diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..0a60039b22366535809bda1335b3905c3257591a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,84 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-105/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-110/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-115/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-125/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-130/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-135/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-145/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-15/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-155/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-160/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-165/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-170/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-175/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-180/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-185/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-190/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-195/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-205/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-210/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-215/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-220/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-225/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-230/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-235/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-245/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-25/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-255/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-260/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-265/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-270/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-275/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-280/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-285/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-290/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-295/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-305/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-310/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-315/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-320/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-325/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-330/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-335/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-340/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-345/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-35/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-355/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-360/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-365/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-370/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-375/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-380/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-385/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-390/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-395/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-397/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-45/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-55/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-65/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-70/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-75/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-85/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoints/checkpoint-95/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f61cc47ce3c05b6efea2ce89b199dc0a5cff9aa --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23aa3581cae20d89bd01cf7c5cbb8d532c318b9b1044bddb7f14ca77fec9b409 +size 74016 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-10/README.md b/checkpoints/checkpoint-10/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-10/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-10/adapter_config.json b/checkpoints/checkpoint-10/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-10/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-10/adapter_model.safetensors b/checkpoints/checkpoint-10/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7be5c3fd7807df9a54634f31e1bddad3a8cfd927 --- /dev/null +++ b/checkpoints/checkpoint-10/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85853e356bd4ca923c180ddafa50a96ffb79a9bd2e31bd9e8aca612746585ce +size 74016 diff --git a/checkpoints/checkpoint-10/chat_template.jinja b/checkpoints/checkpoint-10/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-10/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-10/optimizer.pt b/checkpoints/checkpoint-10/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4c62bbcd43f515ac78b3b22defd00165d0295ef --- /dev/null +++ b/checkpoints/checkpoint-10/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c1c9eedd7c14b1ef4cff251cff502ed133a61f7a8eaa44950ecca483e105be +size 43813 diff --git a/checkpoints/checkpoint-10/rng_state.pth b/checkpoints/checkpoint-10/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-10/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-10/scheduler.pt b/checkpoints/checkpoint-10/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ee408506b56aa6f7290a6b3ecb2c8ec7ec80402 --- /dev/null +++ b/checkpoints/checkpoint-10/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80fb384c7c6b2fc4b769cc3ff7aa3e3b89127590ab5ff19e1d709da4b70c9ca +size 1465 diff --git a/checkpoints/checkpoint-10/special_tokens_map.json b/checkpoints/checkpoint-10/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-10/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-10/tokenizer.json b/checkpoints/checkpoint-10/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-10/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-10/tokenizer_config.json b/checkpoints/checkpoint-10/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-10/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-10/trainer_state.json b/checkpoints/checkpoint-10/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..021c281317ce9b7feb9f685a531d0fb947754fe8 --- /dev/null +++ b/checkpoints/checkpoint-10/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.025220680958385876, + "eval_steps": 100, + "global_step": 10, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1121328559054848.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-10/training_args.bin b/checkpoints/checkpoint-10/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-10/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-100/README.md b/checkpoints/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-100/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-100/adapter_config.json b/checkpoints/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-100/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-100/adapter_model.safetensors b/checkpoints/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d22b61fcc028869c47c26d9b3eab6351834e4255 --- /dev/null +++ b/checkpoints/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62964ca18540988c10df78c5d137cb7ec39b9f55b345abd7763e305b3ed0b1e6 +size 74016 diff --git a/checkpoints/checkpoint-100/chat_template.jinja b/checkpoints/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-100/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-100/optimizer.pt b/checkpoints/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dd1060f17fcad12899030a396ec405c6edb9c4a --- /dev/null +++ b/checkpoints/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc02f1902722bf8dc6d60d5df9db4829c210e71c23d4020ce3b039450479e96f +size 43813 diff --git a/checkpoints/checkpoint-100/rng_state.pth b/checkpoints/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-100/scheduler.pt b/checkpoints/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a2dc200a29e89a655f4825e0c79149c8a1651ce --- /dev/null +++ b/checkpoints/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6702d7b06eb26cdff4035f420aa5101b754c0723ec91a902ea6577e82c1ee7c +size 1465 diff --git a/checkpoints/checkpoint-100/special_tokens_map.json b/checkpoints/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-100/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-100/tokenizer.json b/checkpoints/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-100/tokenizer_config.json b/checkpoints/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-100/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-100/trainer_state.json b/checkpoints/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ab6e46f803c547cf0347f57b2026767ec1ff8b1e --- /dev/null +++ b/checkpoints/checkpoint-100/trainer_state.json @@ -0,0 +1,742 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.25220680958385877, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1174830262083584e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-100/training_args.bin b/checkpoints/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-105/README.md b/checkpoints/checkpoint-105/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-105/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-105/adapter_config.json b/checkpoints/checkpoint-105/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-105/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-105/adapter_model.safetensors b/checkpoints/checkpoint-105/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b031b4a54bdc062bbf7faacbbec4e8b67c5b60cf --- /dev/null +++ b/checkpoints/checkpoint-105/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66627b528fff8a98452316266ce756c4e5623f5c6afe896c603e34cbdc09af46 +size 74016 diff --git a/checkpoints/checkpoint-105/chat_template.jinja b/checkpoints/checkpoint-105/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-105/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-105/optimizer.pt b/checkpoints/checkpoint-105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..daf2137497978fd81b62a9ea65a0cd8399ad1698 --- /dev/null +++ b/checkpoints/checkpoint-105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887f549f759bcda13bc6c84ad495729888797a555d7324e0d9068dffec8160a0 +size 43813 diff --git a/checkpoints/checkpoint-105/rng_state.pth b/checkpoints/checkpoint-105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-105/scheduler.pt b/checkpoints/checkpoint-105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f69e3ce78174d794ef60c82737881120825f527 --- /dev/null +++ b/checkpoints/checkpoint-105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23196ceb128dfeac75d9e88e0b83f09fd1aae87aaf62d20198926850be8262bb +size 1465 diff --git a/checkpoints/checkpoint-105/special_tokens_map.json b/checkpoints/checkpoint-105/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-105/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-105/tokenizer.json b/checkpoints/checkpoint-105/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-105/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-105/tokenizer_config.json b/checkpoints/checkpoint-105/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-105/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-105/trainer_state.json b/checkpoints/checkpoint-105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..585e1b9034e4a4755566cccb9073ecf73fdd4cfd --- /dev/null +++ b/checkpoints/checkpoint-105/trainer_state.json @@ -0,0 +1,777 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2648171500630517, + "eval_steps": 100, + "global_step": 105, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.174283437596672e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-105/training_args.bin b/checkpoints/checkpoint-105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-110/README.md b/checkpoints/checkpoint-110/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-110/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-110/adapter_config.json b/checkpoints/checkpoint-110/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-110/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-110/adapter_model.safetensors b/checkpoints/checkpoint-110/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd81224b7e984f737e74714d73cc63f7fb150676 --- /dev/null +++ b/checkpoints/checkpoint-110/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4395f5cbc5e68f4b580ccf30b59e15720e97d499a4de94861a41183b53d2381 +size 74016 diff --git a/checkpoints/checkpoint-110/chat_template.jinja b/checkpoints/checkpoint-110/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-110/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-110/optimizer.pt b/checkpoints/checkpoint-110/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c5f75a163db1aeb5160215ab6b555950d20ad08 --- /dev/null +++ b/checkpoints/checkpoint-110/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c9bcdf69e166eef93be6a408692b9ec73d3f3477ac255a2915393f7606be048 +size 43813 diff --git a/checkpoints/checkpoint-110/rng_state.pth b/checkpoints/checkpoint-110/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-110/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-110/scheduler.pt b/checkpoints/checkpoint-110/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b50f5d13ca7b80bdb9c6487ad3fb04d269d6e7d3 --- /dev/null +++ b/checkpoints/checkpoint-110/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c579670112e91f1ab565d1c7f636a9a46e1705f4efd7df06429ef02890e532f0 +size 1465 diff --git a/checkpoints/checkpoint-110/special_tokens_map.json b/checkpoints/checkpoint-110/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-110/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-110/tokenizer.json b/checkpoints/checkpoint-110/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-110/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-110/tokenizer_config.json b/checkpoints/checkpoint-110/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-110/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-110/trainer_state.json b/checkpoints/checkpoint-110/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4f2220e82a89c2cef9490d2a56f7ea31bc1cd4b0 --- /dev/null +++ b/checkpoints/checkpoint-110/trainer_state.json @@ -0,0 +1,812 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.27742749054224464, + "eval_steps": 100, + "global_step": 110, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.231471104049152e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-110/training_args.bin b/checkpoints/checkpoint-110/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-110/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-115/README.md b/checkpoints/checkpoint-115/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-115/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-115/adapter_config.json b/checkpoints/checkpoint-115/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-115/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-115/adapter_model.safetensors b/checkpoints/checkpoint-115/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d62583baaeb1144f569d7dc1b41b962cbca18bb --- /dev/null +++ b/checkpoints/checkpoint-115/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b3cb7871b8614161df9aa61f34254ad0e0e9c68ca377e42c287f3219d5decb +size 74016 diff --git a/checkpoints/checkpoint-115/chat_template.jinja b/checkpoints/checkpoint-115/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-115/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-115/optimizer.pt b/checkpoints/checkpoint-115/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..766ac5c8afcf6cf14651c9620d4908f66df6af5c --- /dev/null +++ b/checkpoints/checkpoint-115/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97910537fb6ee9fe4f2b852473700f58facedaa335b493b297bd8943407b24f2 +size 43813 diff --git a/checkpoints/checkpoint-115/rng_state.pth b/checkpoints/checkpoint-115/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-115/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-115/scheduler.pt b/checkpoints/checkpoint-115/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..05280b44b2eed8b7b162c114205da15836a6a2fa --- /dev/null +++ b/checkpoints/checkpoint-115/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f931c3c0e8213cc0b88d10e7af5ed0e4c52d5d46e69d4fb2fc5ee5813b3b0b90 +size 1465 diff --git a/checkpoints/checkpoint-115/special_tokens_map.json b/checkpoints/checkpoint-115/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-115/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-115/tokenizer.json b/checkpoints/checkpoint-115/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-115/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-115/tokenizer_config.json b/checkpoints/checkpoint-115/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-115/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-115/trainer_state.json b/checkpoints/checkpoint-115/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7880fe81208540aa314a80b86caadaf3a4c6e42a --- /dev/null +++ b/checkpoints/checkpoint-115/trainer_state.json @@ -0,0 +1,847 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2900378310214376, + "eval_steps": 100, + "global_step": 115, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2873619163332608e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-115/training_args.bin b/checkpoints/checkpoint-115/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-115/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-120/README.md b/checkpoints/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-120/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-120/adapter_config.json b/checkpoints/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-120/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-120/adapter_model.safetensors b/checkpoints/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88a73b48496f7a0a57112d48485fa64b9449f4bb --- /dev/null +++ b/checkpoints/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1909609ae6c2893ca26d56385a835fed6b13a753079688fbdfa7c84e30e6fcf1 +size 74016 diff --git a/checkpoints/checkpoint-120/chat_template.jinja b/checkpoints/checkpoint-120/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-120/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-120/optimizer.pt b/checkpoints/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..933c764ae007ac6effe7865f2016912f9e53e16f --- /dev/null +++ b/checkpoints/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf479914185acae1ef16063ff907ce0df904944f1619bd50f8c4f12d59f2d7c4 +size 43813 diff --git a/checkpoints/checkpoint-120/rng_state.pth b/checkpoints/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-120/scheduler.pt b/checkpoints/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b180ad2cdd33e489dde6b95e595884514dcdf2e --- /dev/null +++ b/checkpoints/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f837a3c6fcb33b37029da4c29789a9428e4c4b51508d6cf78aac3e04bd26b5d +size 1465 diff --git a/checkpoints/checkpoint-120/special_tokens_map.json b/checkpoints/checkpoint-120/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-120/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-120/tokenizer.json b/checkpoints/checkpoint-120/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-120/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-120/tokenizer_config.json b/checkpoints/checkpoint-120/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-120/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-120/trainer_state.json b/checkpoints/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f32b725bcea33b2f4e8ba6dcace96196361c1515 --- /dev/null +++ b/checkpoints/checkpoint-120/trainer_state.json @@ -0,0 +1,882 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3026481715006305, + "eval_steps": 100, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3431356515049472e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-120/training_args.bin b/checkpoints/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-125/README.md b/checkpoints/checkpoint-125/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-125/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-125/adapter_config.json b/checkpoints/checkpoint-125/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-125/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-125/adapter_model.safetensors b/checkpoints/checkpoint-125/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb02037d500618f4dcb9cea55221a4c581ace808 --- /dev/null +++ b/checkpoints/checkpoint-125/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28bf20ca0154ca0d1ed47dab8b659a4bdc47a8213c56d9ea763fc35d730cdf57 +size 74016 diff --git a/checkpoints/checkpoint-125/chat_template.jinja b/checkpoints/checkpoint-125/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-125/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-125/optimizer.pt b/checkpoints/checkpoint-125/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4e295cf724bccabf4eeae8150f2262321171ff0 --- /dev/null +++ b/checkpoints/checkpoint-125/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b443b605fe8cbdb4ce0d097a168b7b2e3734a30f64ec0f1ffe4ba13a60a6637b +size 43813 diff --git a/checkpoints/checkpoint-125/rng_state.pth b/checkpoints/checkpoint-125/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-125/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-125/scheduler.pt b/checkpoints/checkpoint-125/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..286bc230bcfe6adde52992790508943ffb21959e --- /dev/null +++ b/checkpoints/checkpoint-125/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08651481ddfe04a5ff5ef20c3a12f57cc7f296fefff021e822e97b0e6f542aea +size 1465 diff --git a/checkpoints/checkpoint-125/special_tokens_map.json b/checkpoints/checkpoint-125/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-125/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-125/tokenizer.json b/checkpoints/checkpoint-125/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-125/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-125/tokenizer_config.json b/checkpoints/checkpoint-125/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-125/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-125/trainer_state.json b/checkpoints/checkpoint-125/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b175845278079ff91423bdcea3da60f26aa4f30b --- /dev/null +++ b/checkpoints/checkpoint-125/trainer_state.json @@ -0,0 +1,917 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.31525851197982346, + "eval_steps": 100, + "global_step": 125, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.399791967985664e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-125/training_args.bin b/checkpoints/checkpoint-125/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-125/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-130/README.md b/checkpoints/checkpoint-130/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-130/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-130/adapter_config.json b/checkpoints/checkpoint-130/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-130/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-130/adapter_model.safetensors b/checkpoints/checkpoint-130/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dc83e316f84ebc06cb7c2e19c39038d39d333cf --- /dev/null +++ b/checkpoints/checkpoint-130/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb1e8954c1629df4a35f61173027d1b8a4c7f7586434bb626fe077684357a0ed +size 74016 diff --git a/checkpoints/checkpoint-130/chat_template.jinja b/checkpoints/checkpoint-130/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-130/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-130/optimizer.pt b/checkpoints/checkpoint-130/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..244fb991f8ba19af1a735bec343b9819f383212b --- /dev/null +++ b/checkpoints/checkpoint-130/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280b1f7e4fa17e0aedd832764a13d0c059d0955889c05122fad338367c38db56 +size 43813 diff --git a/checkpoints/checkpoint-130/rng_state.pth b/checkpoints/checkpoint-130/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-130/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-130/scheduler.pt b/checkpoints/checkpoint-130/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..41508daf3d9118f3aee6b5e02c54c4c7f3122f54 --- /dev/null +++ b/checkpoints/checkpoint-130/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2940337cf97010db8176d334bcbb9aa0df5ae47aa1f960bdef2469f602d5f30 +size 1465 diff --git a/checkpoints/checkpoint-130/special_tokens_map.json b/checkpoints/checkpoint-130/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-130/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-130/tokenizer.json b/checkpoints/checkpoint-130/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-130/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-130/tokenizer_config.json b/checkpoints/checkpoint-130/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-130/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-130/trainer_state.json b/checkpoints/checkpoint-130/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ada993f0236c11d2150cd91b52fe57f0fefcd52c --- /dev/null +++ b/checkpoints/checkpoint-130/trainer_state.json @@ -0,0 +1,952 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.32786885245901637, + "eval_steps": 100, + "global_step": 130, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.456124070924288e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-130/training_args.bin b/checkpoints/checkpoint-130/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-130/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-135/README.md b/checkpoints/checkpoint-135/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-135/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-135/adapter_config.json b/checkpoints/checkpoint-135/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-135/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-135/adapter_model.safetensors b/checkpoints/checkpoint-135/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8172bf797f97338702b5e31dc90573ec827884a3 --- /dev/null +++ b/checkpoints/checkpoint-135/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef289a2cd2ed57682d1e6cdda51c8503f2ed570e2b81e98664f3ad6a127e57d +size 74016 diff --git a/checkpoints/checkpoint-135/chat_template.jinja b/checkpoints/checkpoint-135/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-135/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-135/optimizer.pt b/checkpoints/checkpoint-135/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47d3e9acbec2b5c083c2c0fbbeeefc049fccac1 --- /dev/null +++ b/checkpoints/checkpoint-135/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c8095d91e97ecc11e6a47da7d1e441edde4d9f83e0b14ae88d6076ac5d7365 +size 43813 diff --git a/checkpoints/checkpoint-135/rng_state.pth b/checkpoints/checkpoint-135/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-135/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-135/scheduler.pt b/checkpoints/checkpoint-135/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc3e87ce4ac439c129bde36262ecbe7d3a91e7c3 --- /dev/null +++ b/checkpoints/checkpoint-135/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f963eb37d5902b71db9d0c16e8a1b6f21387904de15ef68f7c88eebe11966bc +size 1465 diff --git a/checkpoints/checkpoint-135/special_tokens_map.json b/checkpoints/checkpoint-135/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-135/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-135/tokenizer.json b/checkpoints/checkpoint-135/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-135/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-135/tokenizer_config.json b/checkpoints/checkpoint-135/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-135/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-135/trainer_state.json b/checkpoints/checkpoint-135/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..739da0607711c8e87e81bb10a38128f356b67397 --- /dev/null +++ b/checkpoints/checkpoint-135/trainer_state.json @@ -0,0 +1,987 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.34047919293820933, + "eval_steps": 100, + "global_step": 135, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5131946602643456e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-135/training_args.bin b/checkpoints/checkpoint-135/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-135/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-140/README.md b/checkpoints/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-140/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-140/adapter_config.json b/checkpoints/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-140/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-140/adapter_model.safetensors b/checkpoints/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bed69d0a91ccf5be618265672571f3612e6abf7 --- /dev/null +++ b/checkpoints/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afa74bd9850bcf17997fc7abf6394ea151a05933a12c2613d83a27b430e7270 +size 74016 diff --git a/checkpoints/checkpoint-140/chat_template.jinja b/checkpoints/checkpoint-140/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-140/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-140/optimizer.pt b/checkpoints/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4fc91a04ff9273eb413d03dfef4f31db6b0b550 --- /dev/null +++ b/checkpoints/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3aaad2d26f956492aa3974c64e0d6c1d9de66e64d2aad5cec08898dc9d3b5ec +size 43813 diff --git a/checkpoints/checkpoint-140/rng_state.pth b/checkpoints/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-140/scheduler.pt b/checkpoints/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc41d46f496dd65e5dd0ad537ae72d20c6b1c24b --- /dev/null +++ b/checkpoints/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b1218e825b22bccb6f4fc5d47b9f4bc025a0fa0b82be351c5c0dde4212e1da3 +size 1465 diff --git a/checkpoints/checkpoint-140/special_tokens_map.json b/checkpoints/checkpoint-140/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-140/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-140/tokenizer.json b/checkpoints/checkpoint-140/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-140/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-140/tokenizer_config.json b/checkpoints/checkpoint-140/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-140/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-140/trainer_state.json b/checkpoints/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b653d77da4ea50132686ef6e5f022cd56bece02 --- /dev/null +++ b/checkpoints/checkpoint-140/trainer_state.json @@ -0,0 +1,1022 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3530895334174023, + "eval_steps": 100, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5701031428333568e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-140/training_args.bin b/checkpoints/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-145/README.md b/checkpoints/checkpoint-145/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-145/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-145/adapter_config.json b/checkpoints/checkpoint-145/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-145/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-145/adapter_model.safetensors b/checkpoints/checkpoint-145/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75afc669be0024f4219effac8b217eb5d1df5be9 --- /dev/null +++ b/checkpoints/checkpoint-145/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b81d9cd7d9a19ea6a1c6af4586efaa20be04dd6883991720052635e419368350 +size 74016 diff --git a/checkpoints/checkpoint-145/chat_template.jinja b/checkpoints/checkpoint-145/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-145/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-145/optimizer.pt b/checkpoints/checkpoint-145/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..395c6d72b4d4185148f3e814baa531332f46c160 --- /dev/null +++ b/checkpoints/checkpoint-145/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b4079f106a5f3942476745909b6f4185ba54645d7d78b53e04bec2102db28f +size 43813 diff --git a/checkpoints/checkpoint-145/rng_state.pth b/checkpoints/checkpoint-145/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-145/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-145/scheduler.pt b/checkpoints/checkpoint-145/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2a3ad7cf38ee5547cc3c86f44d055e81786c645 --- /dev/null +++ b/checkpoints/checkpoint-145/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d46c42e68d8c53812492854ef4e8240657c183ae0c12d5bffa699d7bdb2cd2 +size 1465 diff --git a/checkpoints/checkpoint-145/special_tokens_map.json b/checkpoints/checkpoint-145/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-145/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-145/tokenizer.json b/checkpoints/checkpoint-145/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-145/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-145/tokenizer_config.json b/checkpoints/checkpoint-145/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-145/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-145/trainer_state.json b/checkpoints/checkpoint-145/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b8862c4a7be429f7421dc92bba3056b04fb3c781 --- /dev/null +++ b/checkpoints/checkpoint-145/trainer_state.json @@ -0,0 +1,1057 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3656998738965952, + "eval_steps": 100, + "global_step": 145, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6269125601533952e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-145/training_args.bin b/checkpoints/checkpoint-145/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-145/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-15/README.md b/checkpoints/checkpoint-15/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-15/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-15/adapter_config.json b/checkpoints/checkpoint-15/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-15/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-15/adapter_model.safetensors b/checkpoints/checkpoint-15/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5e4d118371914d8a9d558de4ba4720e4f35e799 --- /dev/null +++ b/checkpoints/checkpoint-15/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814f30469d9f8a79174bdeb0c63e71b70bc09b1702bd8ed9b5e47a47ff959a27 +size 74016 diff --git a/checkpoints/checkpoint-15/chat_template.jinja b/checkpoints/checkpoint-15/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-15/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-15/optimizer.pt b/checkpoints/checkpoint-15/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ea8b6488070bed6e2fc2793e4318723f0a5999c --- /dev/null +++ b/checkpoints/checkpoint-15/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e8e3733716789b1f6e6608fed44d881cb0028e24716df68ce2c837e3dd56e09 +size 43813 diff --git a/checkpoints/checkpoint-15/rng_state.pth b/checkpoints/checkpoint-15/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-15/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-15/scheduler.pt b/checkpoints/checkpoint-15/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea0c757c22298c9949c912b5e8adbf809d8daa88 --- /dev/null +++ b/checkpoints/checkpoint-15/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8488a84e9ba82f6f3aa72ad8a2a7909bf86011bfa7830cc227df9703f59da3 +size 1465 diff --git a/checkpoints/checkpoint-15/special_tokens_map.json b/checkpoints/checkpoint-15/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-15/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-15/tokenizer.json b/checkpoints/checkpoint-15/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-15/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-15/tokenizer_config.json b/checkpoints/checkpoint-15/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-15/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-15/trainer_state.json b/checkpoints/checkpoint-15/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..24c6803db43832e2042b9d0345b32972abba5cbe --- /dev/null +++ b/checkpoints/checkpoint-15/trainer_state.json @@ -0,0 +1,139 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.03783102143757881, + "eval_steps": 100, + "global_step": 15, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1689873028841472.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-15/training_args.bin b/checkpoints/checkpoint-15/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-15/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-150/README.md b/checkpoints/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-150/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-150/adapter_config.json b/checkpoints/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-150/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-150/adapter_model.safetensors b/checkpoints/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..337f8e60cfb767b9f3016be9d6c851d20a2c3464 --- /dev/null +++ b/checkpoints/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e7ba70bccb9904ab98bc436086d938fc93051522e311db7575e92f3f4831cd +size 74016 diff --git a/checkpoints/checkpoint-150/chat_template.jinja b/checkpoints/checkpoint-150/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-150/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-150/optimizer.pt b/checkpoints/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4377659016986b56c2fba6065de5aa4c92ed640e --- /dev/null +++ b/checkpoints/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45274be80b96a42704563d23cfec1d73706cf17172f094d567bcf32bcc64cfe +size 43813 diff --git a/checkpoints/checkpoint-150/rng_state.pth b/checkpoints/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-150/scheduler.pt b/checkpoints/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac6e4fdc9db1809ce0fc85d3274486396f84b711 --- /dev/null +++ b/checkpoints/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1577eec1a6e0dcadc696543d7c691e8c0d5ad6de5714ef1d566b91186063677 +size 1465 diff --git a/checkpoints/checkpoint-150/special_tokens_map.json b/checkpoints/checkpoint-150/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-150/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-150/tokenizer.json b/checkpoints/checkpoint-150/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-150/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-150/tokenizer_config.json b/checkpoints/checkpoint-150/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-150/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-150/trainer_state.json b/checkpoints/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..12b829b16ad88bc4e48fda7f5d433549c16c73a3 --- /dev/null +++ b/checkpoints/checkpoint-150/trainer_state.json @@ -0,0 +1,1092 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.37831021437578816, + "eval_steps": 100, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.682578224144384e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-150/training_args.bin b/checkpoints/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-155/README.md b/checkpoints/checkpoint-155/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-155/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-155/adapter_config.json b/checkpoints/checkpoint-155/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-155/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-155/adapter_model.safetensors b/checkpoints/checkpoint-155/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e31067d37b39d4c08149dc0c70e22b7969c0898c --- /dev/null +++ b/checkpoints/checkpoint-155/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5bdfbe6963388d1f804e026eb914605a7ef8d9a33e54f8c9d717774c305a6f +size 74016 diff --git a/checkpoints/checkpoint-155/chat_template.jinja b/checkpoints/checkpoint-155/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-155/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-155/optimizer.pt b/checkpoints/checkpoint-155/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ce0a5d5341ddcb6bd5d1c9f7cf189f2c47cdd33 --- /dev/null +++ b/checkpoints/checkpoint-155/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff4ee4fccc21a433939577f3216ab0043f7e15a44ce316e49f6a8a40d54cdc5c +size 43813 diff --git a/checkpoints/checkpoint-155/rng_state.pth b/checkpoints/checkpoint-155/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-155/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-155/scheduler.pt b/checkpoints/checkpoint-155/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..587c6b978349cbeedafb92d8696c28c75e97ecb8 --- /dev/null +++ b/checkpoints/checkpoint-155/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737ad8b8349b8a30cf6d4098ab5ab8906c638598d30b34fa4faeee0d99d0217d +size 1465 diff --git a/checkpoints/checkpoint-155/special_tokens_map.json b/checkpoints/checkpoint-155/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-155/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-155/tokenizer.json b/checkpoints/checkpoint-155/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-155/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-155/tokenizer_config.json b/checkpoints/checkpoint-155/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-155/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-155/trainer_state.json b/checkpoints/checkpoint-155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ca621f537e9d9453ef316106772da78ea2aa4ac8 --- /dev/null +++ b/checkpoints/checkpoint-155/trainer_state.json @@ -0,0 +1,1127 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.39092055485498106, + "eval_steps": 100, + "global_step": 155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7397388728016896e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-155/training_args.bin b/checkpoints/checkpoint-155/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-155/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-160/README.md b/checkpoints/checkpoint-160/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-160/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-160/adapter_config.json b/checkpoints/checkpoint-160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-160/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-160/adapter_model.safetensors b/checkpoints/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eba0e815f4316b800b357d110fcc0d991fb706ab --- /dev/null +++ b/checkpoints/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86ef1eec7b9f41facd7892df4c1f996e8e028a6dd47e30fc43764e483d57c21 +size 74016 diff --git a/checkpoints/checkpoint-160/chat_template.jinja b/checkpoints/checkpoint-160/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-160/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-160/optimizer.pt b/checkpoints/checkpoint-160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7779bc64b8c1a7fb9aa55b5295594d5be727d324 --- /dev/null +++ b/checkpoints/checkpoint-160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dbfbed49daf10747b786b3d1b7dbe94bc46b68f21336acb1c2f1d483b94f953 +size 43813 diff --git a/checkpoints/checkpoint-160/rng_state.pth b/checkpoints/checkpoint-160/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-160/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-160/scheduler.pt b/checkpoints/checkpoint-160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..11d4c33a69ab2b72bb7ff8886933dc4ddcd195c5 --- /dev/null +++ b/checkpoints/checkpoint-160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b6652fb5b5dff04fe4127b8c325c611b1eb8148abfc3663612a4e09ca8299d +size 1465 diff --git a/checkpoints/checkpoint-160/special_tokens_map.json b/checkpoints/checkpoint-160/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-160/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-160/tokenizer.json b/checkpoints/checkpoint-160/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-160/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-160/tokenizer_config.json b/checkpoints/checkpoint-160/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-160/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-160/trainer_state.json b/checkpoints/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f8b0d005da9e7089ff6b1be7ffde01b08890815 --- /dev/null +++ b/checkpoints/checkpoint-160/trainer_state.json @@ -0,0 +1,1162 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.403530895334174, + "eval_steps": 100, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7971156638203904e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-160/training_args.bin b/checkpoints/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-165/README.md b/checkpoints/checkpoint-165/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-165/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-165/adapter_config.json b/checkpoints/checkpoint-165/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-165/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-165/adapter_model.safetensors b/checkpoints/checkpoint-165/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c8af8993c4035550f5dff382a6faaf89dfca788 --- /dev/null +++ b/checkpoints/checkpoint-165/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8752029e7df4d48d52c608c20c12e161572361bfba86bc3863d6e771016f312 +size 74016 diff --git a/checkpoints/checkpoint-165/chat_template.jinja b/checkpoints/checkpoint-165/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-165/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-165/optimizer.pt b/checkpoints/checkpoint-165/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5baa9e08aee5a94af82d6789c73822b24b169c93 --- /dev/null +++ b/checkpoints/checkpoint-165/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090c91e08cc82298abe976b877a7e424d93792c0351f92667b3a6d8640677c16 +size 43813 diff --git a/checkpoints/checkpoint-165/rng_state.pth b/checkpoints/checkpoint-165/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-165/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-165/scheduler.pt b/checkpoints/checkpoint-165/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c10435b364d00e9b38656d58a55615fb9a382f9 --- /dev/null +++ b/checkpoints/checkpoint-165/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a2cd4b56b5437654da017893ed0fd7e1fe4d39b31f0568882cd09bbba6a8fc +size 1465 diff --git a/checkpoints/checkpoint-165/special_tokens_map.json b/checkpoints/checkpoint-165/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-165/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-165/tokenizer.json b/checkpoints/checkpoint-165/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-165/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-165/tokenizer_config.json b/checkpoints/checkpoint-165/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-165/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-165/trainer_state.json b/checkpoints/checkpoint-165/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..296234659c5e5c94678c962ff71911ca61f209c3 --- /dev/null +++ b/checkpoints/checkpoint-165/trainer_state.json @@ -0,0 +1,1197 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.416141235813367, + "eval_steps": 100, + "global_step": 165, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.851565527028531e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-165/training_args.bin b/checkpoints/checkpoint-165/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-165/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-170/README.md b/checkpoints/checkpoint-170/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-170/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-170/adapter_config.json b/checkpoints/checkpoint-170/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-170/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-170/adapter_model.safetensors b/checkpoints/checkpoint-170/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd05a938dc0d7efb509d731963d5c185cf2a0a5c --- /dev/null +++ b/checkpoints/checkpoint-170/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f977c58e1e5810376a84d75db1de657036a2f59839bbd79c7f61ceaf2627fd1 +size 74016 diff --git a/checkpoints/checkpoint-170/chat_template.jinja b/checkpoints/checkpoint-170/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-170/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-170/optimizer.pt b/checkpoints/checkpoint-170/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7671ee82fc88e88ff9b36b0f4f2f4f287673c3e --- /dev/null +++ b/checkpoints/checkpoint-170/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ebd72017b2c498634efeff7c1e3ee8a55a4f3d499de1ebd9ad524e517c664e +size 43813 diff --git a/checkpoints/checkpoint-170/rng_state.pth b/checkpoints/checkpoint-170/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-170/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-170/scheduler.pt b/checkpoints/checkpoint-170/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aef779fcd067bf2570c89eab1378929c850cc9d2 --- /dev/null +++ b/checkpoints/checkpoint-170/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f389aeb1721c25a07db709896a50e83edee3bd6414e4d22ad9a4e315cf219c2a +size 1465 diff --git a/checkpoints/checkpoint-170/special_tokens_map.json b/checkpoints/checkpoint-170/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-170/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-170/tokenizer.json b/checkpoints/checkpoint-170/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-170/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-170/tokenizer_config.json b/checkpoints/checkpoint-170/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-170/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-170/trainer_state.json b/checkpoints/checkpoint-170/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e60dd9ae83f365c203f28ffa86e3c98f565417c5 --- /dev/null +++ b/checkpoints/checkpoint-170/trainer_state.json @@ -0,0 +1,1232 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4287515762925599, + "eval_steps": 100, + "global_step": 170, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.9087351816175616e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-170/training_args.bin b/checkpoints/checkpoint-170/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-170/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-175/README.md b/checkpoints/checkpoint-175/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-175/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-175/adapter_config.json b/checkpoints/checkpoint-175/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-175/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-175/adapter_model.safetensors b/checkpoints/checkpoint-175/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44455d778ca810ea27677ba393d0a7d2fb804dcc --- /dev/null +++ b/checkpoints/checkpoint-175/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4abe9325d17197c5aa10b420deef3ddd998c0421f6e06e1698f757b7843f917 +size 74016 diff --git a/checkpoints/checkpoint-175/chat_template.jinja b/checkpoints/checkpoint-175/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-175/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-175/optimizer.pt b/checkpoints/checkpoint-175/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..07b38865847cc837faebb4561e72d102318850d2 --- /dev/null +++ b/checkpoints/checkpoint-175/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c1dc3546be999b94b226714a41e8a08ba1ed97f1b7773a28977238e57e64861 +size 43813 diff --git a/checkpoints/checkpoint-175/rng_state.pth b/checkpoints/checkpoint-175/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-175/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-175/scheduler.pt b/checkpoints/checkpoint-175/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e313c866e1361abff6a139247531cb6e9b8f9f5 --- /dev/null +++ b/checkpoints/checkpoint-175/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd667c62afb5ae84f9a633214cf760612f84947a66d123f9b2ade14f74aaaea +size 1465 diff --git a/checkpoints/checkpoint-175/special_tokens_map.json b/checkpoints/checkpoint-175/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-175/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-175/tokenizer.json b/checkpoints/checkpoint-175/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-175/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-175/tokenizer_config.json b/checkpoints/checkpoint-175/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-175/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-175/trainer_state.json b/checkpoints/checkpoint-175/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eb761e1edbf4bdc9f9263101bd2241b5d381ba46 --- /dev/null +++ b/checkpoints/checkpoint-175/trainer_state.json @@ -0,0 +1,1267 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.44136191677175285, + "eval_steps": 100, + "global_step": 175, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.965310444712755e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-175/training_args.bin b/checkpoints/checkpoint-175/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-175/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-180/README.md b/checkpoints/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-180/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-180/adapter_config.json b/checkpoints/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-180/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-180/adapter_model.safetensors b/checkpoints/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e174a875535bde31ea3ff65dec8c4e7631bad89d --- /dev/null +++ b/checkpoints/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc450718f1a0436fbade3a81092fd0431af61947e99ab4e85dd426949216bef +size 74016 diff --git a/checkpoints/checkpoint-180/chat_template.jinja b/checkpoints/checkpoint-180/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-180/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-180/optimizer.pt b/checkpoints/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b6bea381113e8a8d7ed45b58860e82e44b6d00c --- /dev/null +++ b/checkpoints/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5107c8e4aadee4eebd1d7aeeabadea4897d10bf610f04a5e27e7b5e6ed94f641 +size 43813 diff --git a/checkpoints/checkpoint-180/rng_state.pth b/checkpoints/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-180/scheduler.pt b/checkpoints/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0233fae2f0cff9a5bc0c61015b584670f8cf1dfe --- /dev/null +++ b/checkpoints/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8435f832c953ffd48b0dd8c7e72c1be53fea0841335092a2e03d8352ddaf8e +size 1465 diff --git a/checkpoints/checkpoint-180/special_tokens_map.json b/checkpoints/checkpoint-180/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-180/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-180/tokenizer.json b/checkpoints/checkpoint-180/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-180/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-180/tokenizer_config.json b/checkpoints/checkpoint-180/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-180/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-180/trainer_state.json b/checkpoints/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d8e1274cdace18e84d947183bdab503d8a550fbb --- /dev/null +++ b/checkpoints/checkpoint-180/trainer_state.json @@ -0,0 +1,1302 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.45397225725094575, + "eval_steps": 100, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.0223089865990144e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-180/training_args.bin b/checkpoints/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-185/README.md b/checkpoints/checkpoint-185/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-185/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-185/adapter_config.json b/checkpoints/checkpoint-185/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-185/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-185/adapter_model.safetensors b/checkpoints/checkpoint-185/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8be447df866f9c39bf1f16b9dd489e6e878d736 --- /dev/null +++ b/checkpoints/checkpoint-185/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055c24d3714588833342f3f00c6ed6f6662857c8c5785c42e617dc79fe0d1278 +size 74016 diff --git a/checkpoints/checkpoint-185/chat_template.jinja b/checkpoints/checkpoint-185/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-185/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-185/optimizer.pt b/checkpoints/checkpoint-185/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8387611bcf66b6b4989298b84a5f19652d4b633 --- /dev/null +++ b/checkpoints/checkpoint-185/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc8446cd395cc67c3e5c6862687a7ad68aad48d806ab31bb255c4a44cdee46a6 +size 43813 diff --git a/checkpoints/checkpoint-185/rng_state.pth b/checkpoints/checkpoint-185/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-185/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-185/scheduler.pt b/checkpoints/checkpoint-185/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2117742e682b9ef94bf7928fab1f85859494e54 --- /dev/null +++ b/checkpoints/checkpoint-185/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0176ae044eee5e53e20dc1b5a779de96510f69506ca3092c7abd5656ede1df9 +size 1465 diff --git a/checkpoints/checkpoint-185/special_tokens_map.json b/checkpoints/checkpoint-185/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-185/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-185/tokenizer.json b/checkpoints/checkpoint-185/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-185/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-185/tokenizer_config.json b/checkpoints/checkpoint-185/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-185/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-185/trainer_state.json b/checkpoints/checkpoint-185/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5f7eff7c7bd62e00056bfd0349d8e76f6996c88f --- /dev/null +++ b/checkpoints/checkpoint-185/trainer_state.json @@ -0,0 +1,1337 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4665825977301387, + "eval_steps": 100, + "global_step": 185, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.078334887858995e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-185/training_args.bin b/checkpoints/checkpoint-185/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-185/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-190/README.md b/checkpoints/checkpoint-190/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-190/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-190/adapter_config.json b/checkpoints/checkpoint-190/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-190/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-190/adapter_model.safetensors b/checkpoints/checkpoint-190/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4c021882b1859e31630fd4897d950ba1df8bdd7 --- /dev/null +++ b/checkpoints/checkpoint-190/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9c3b2f01e9349abdbed3c35fdd066ed83e9c1665f699158cc34b91ee89bd50 +size 74016 diff --git a/checkpoints/checkpoint-190/chat_template.jinja b/checkpoints/checkpoint-190/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-190/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-190/optimizer.pt b/checkpoints/checkpoint-190/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4962ab48c48fed277e530842293496f9e384eecf --- /dev/null +++ b/checkpoints/checkpoint-190/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875e5b054ac9315d1a3963fe22f0298161c0eff0a4a9a5018a41f6a109f91b7a +size 43813 diff --git a/checkpoints/checkpoint-190/rng_state.pth b/checkpoints/checkpoint-190/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-190/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-190/scheduler.pt b/checkpoints/checkpoint-190/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a63cd71a12f1704f4cf910b01dc4d0ccac9e3e67 --- /dev/null +++ b/checkpoints/checkpoint-190/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d20bfee158d956b1a68ab8913dc87d6e67c838b80c163a0e813f60abce40427 +size 1465 diff --git a/checkpoints/checkpoint-190/special_tokens_map.json b/checkpoints/checkpoint-190/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-190/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-190/tokenizer.json b/checkpoints/checkpoint-190/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-190/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-190/tokenizer_config.json b/checkpoints/checkpoint-190/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-190/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-190/trainer_state.json b/checkpoints/checkpoint-190/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c4ce9ca0f470a36f1d5e069295115f261a820d02 --- /dev/null +++ b/checkpoints/checkpoint-190/trainer_state.json @@ -0,0 +1,1372 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4791929382093317, + "eval_steps": 100, + "global_step": 190, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1351533111107584e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-190/training_args.bin b/checkpoints/checkpoint-190/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-190/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-195/README.md b/checkpoints/checkpoint-195/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-195/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-195/adapter_config.json b/checkpoints/checkpoint-195/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-195/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-195/adapter_model.safetensors b/checkpoints/checkpoint-195/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdfe27780d6be2f262dd6c9e71acf82d1d6097a8 --- /dev/null +++ b/checkpoints/checkpoint-195/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eedb6a8cc2f92a1bd663ddaff4c34f860da496a2ad50b17105b5a23a6fc2a533 +size 74016 diff --git a/checkpoints/checkpoint-195/chat_template.jinja b/checkpoints/checkpoint-195/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-195/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-195/optimizer.pt b/checkpoints/checkpoint-195/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bb341f4cc3d7adb8732e24fcfddbf23cbb2aac9 --- /dev/null +++ b/checkpoints/checkpoint-195/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0173a5cd372c5d5a81f7c50bb620839249b9bc1e12999857907250e642520b88 +size 43813 diff --git a/checkpoints/checkpoint-195/rng_state.pth b/checkpoints/checkpoint-195/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d681a34283ea87d8ee31827a25caa7feffbf71 --- /dev/null +++ b/checkpoints/checkpoint-195/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde881b24af86d8870de73e5328bf8754c9e262c58bcd875cf451eca0e6671d5 +size 14581 diff --git a/checkpoints/checkpoint-195/scheduler.pt b/checkpoints/checkpoint-195/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc8f46bffd97982e6f1540ec514157b5f7a95c45 --- /dev/null +++ b/checkpoints/checkpoint-195/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e7f39669f178193f4d25711573639673d183357a06bf683136252b9d8c1331 +size 1465 diff --git a/checkpoints/checkpoint-195/special_tokens_map.json b/checkpoints/checkpoint-195/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-195/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-195/tokenizer.json b/checkpoints/checkpoint-195/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-195/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-195/tokenizer_config.json b/checkpoints/checkpoint-195/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-195/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-195/trainer_state.json b/checkpoints/checkpoint-195/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0b08a15e5dcde75eecb5fef76a188aeadcaee18 --- /dev/null +++ b/checkpoints/checkpoint-195/trainer_state.json @@ -0,0 +1,1407 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4918032786885246, + "eval_steps": 100, + "global_step": 195, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1899994353147904e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-195/training_args.bin b/checkpoints/checkpoint-195/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-195/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-20/README.md b/checkpoints/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-20/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-20/adapter_config.json b/checkpoints/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-20/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-20/adapter_model.safetensors b/checkpoints/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bd0431f73791928dd41c9483535dc8132857b3e --- /dev/null +++ b/checkpoints/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4a56a572eac25c95c720a40a189a4634119dbeade16e78f1dc42f570720141 +size 74016 diff --git a/checkpoints/checkpoint-20/chat_template.jinja b/checkpoints/checkpoint-20/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-20/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-20/optimizer.pt b/checkpoints/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28a13236269cd17df7b8becc22ebfc2bc1369f7d --- /dev/null +++ b/checkpoints/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5fca1e499bf2f0dd7b5abaddd729aa6b9b3b964a2623408a96865b560acd785 +size 43813 diff --git a/checkpoints/checkpoint-20/rng_state.pth b/checkpoints/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-20/scheduler.pt b/checkpoints/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..53ca2af2a2ea224ddb017520a85c7ae74781d248 --- /dev/null +++ b/checkpoints/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b67669993b1be8afc318a211bb6d47e3c53c613107b1ad38bf27354e307377 +size 1465 diff --git a/checkpoints/checkpoint-20/special_tokens_map.json b/checkpoints/checkpoint-20/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-20/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-20/tokenizer.json b/checkpoints/checkpoint-20/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-20/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-20/tokenizer_config.json b/checkpoints/checkpoint-20/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-20/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-20/trainer_state.json b/checkpoints/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7543ea91aa60a44d5c95f946a229213e4c86a359 --- /dev/null +++ b/checkpoints/checkpoint-20/trainer_state.json @@ -0,0 +1,174 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.05044136191677175, + "eval_steps": 100, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2248691092365312.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-20/training_args.bin b/checkpoints/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-200/README.md b/checkpoints/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-200/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-200/adapter_config.json b/checkpoints/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-200/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-200/adapter_model.safetensors b/checkpoints/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2322ba8188994b1e31eacde50a6497a9be2411c --- /dev/null +++ b/checkpoints/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd04e5b759bce098acea29ebc3694d59ac46fa102090f110b664fad91d6aed67 +size 74016 diff --git a/checkpoints/checkpoint-200/chat_template.jinja b/checkpoints/checkpoint-200/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-200/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-200/optimizer.pt b/checkpoints/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0694361f64ac4d779c102b7481a0e2420be6cbe --- /dev/null +++ b/checkpoints/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:873777626f9e2c8f5a9ce583f4ca6070fac7131978eccc6a232f6b6a0f170d12 +size 43813 diff --git a/checkpoints/checkpoint-200/rng_state.pth b/checkpoints/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-200/scheduler.pt b/checkpoints/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c24af522d593c6cbc8037581398b3e0c4d6a598 --- /dev/null +++ b/checkpoints/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7becff616ab4b426271452da885f09d6d39a1541d1ce4cd1a32088b9132ca59c +size 1465 diff --git a/checkpoints/checkpoint-200/special_tokens_map.json b/checkpoints/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-200/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-200/tokenizer.json b/checkpoints/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-200/tokenizer_config.json b/checkpoints/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-200/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-200/trainer_state.json b/checkpoints/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e6b345d548e92847673f37e4c9456a7312bd1270 --- /dev/null +++ b/checkpoints/checkpoint-200/trainer_state.json @@ -0,0 +1,1450 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5044136191677175, + "eval_steps": 100, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.248501967799091e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-200/training_args.bin b/checkpoints/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-205/README.md b/checkpoints/checkpoint-205/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-205/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-205/adapter_config.json b/checkpoints/checkpoint-205/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-205/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-205/adapter_model.safetensors b/checkpoints/checkpoint-205/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..987ce0a4deaeafd744265f12e173cd9e3178902d --- /dev/null +++ b/checkpoints/checkpoint-205/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e44d45f9ae929e99dc29fe627266259b78b7206e29d70b9dd995c8739250d35 +size 74016 diff --git a/checkpoints/checkpoint-205/chat_template.jinja b/checkpoints/checkpoint-205/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-205/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-205/optimizer.pt b/checkpoints/checkpoint-205/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b41d578dfabc6ce98dd7807d80ee67f0d1a5c999 --- /dev/null +++ b/checkpoints/checkpoint-205/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af86114033cbc004937b678467f0680b4cab2487c9a9c7a44b5d21cb91d5db3b +size 43813 diff --git a/checkpoints/checkpoint-205/rng_state.pth b/checkpoints/checkpoint-205/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-205/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-205/scheduler.pt b/checkpoints/checkpoint-205/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7d04376d05b5925c27a5e3e8d476b65c5f2ad0e --- /dev/null +++ b/checkpoints/checkpoint-205/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8a36983b4cfc653ca5c48dc1c5118ba0976ec38fdf6883b3449710fce4f970 +size 1465 diff --git a/checkpoints/checkpoint-205/special_tokens_map.json b/checkpoints/checkpoint-205/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-205/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-205/tokenizer.json b/checkpoints/checkpoint-205/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-205/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-205/tokenizer_config.json b/checkpoints/checkpoint-205/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-205/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-205/trainer_state.json b/checkpoints/checkpoint-205/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2a6ae816e7facf37360dabe8597c50b4a4bd176f --- /dev/null +++ b/checkpoints/checkpoint-205/trainer_state.json @@ -0,0 +1,1485 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5170239596469105, + "eval_steps": 100, + "global_step": 205, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.3026636411920384e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-205/training_args.bin b/checkpoints/checkpoint-205/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-205/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-210/README.md b/checkpoints/checkpoint-210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-210/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-210/adapter_config.json b/checkpoints/checkpoint-210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-210/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-210/adapter_model.safetensors b/checkpoints/checkpoint-210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c42dbad4cda6011e595a414676beb1b2556f805c --- /dev/null +++ b/checkpoints/checkpoint-210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d4738def87e5c755fee6ce5b8f5beb3e7ab6ba69d96324a7719842b7566351 +size 74016 diff --git a/checkpoints/checkpoint-210/chat_template.jinja b/checkpoints/checkpoint-210/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-210/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-210/optimizer.pt b/checkpoints/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9dd737e389cbd6968d77aafa96842a9381e12610 --- /dev/null +++ b/checkpoints/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54242b566ec9982a282c50d04c8bf0cdf2cd718b538527ec193e3edec9255d49 +size 43813 diff --git a/checkpoints/checkpoint-210/rng_state.pth b/checkpoints/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-210/scheduler.pt b/checkpoints/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f39a5795e787262dd7b4edaad5fcf643816704a --- /dev/null +++ b/checkpoints/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:279452e1128e4d9eddb9319134b9845f5fcdf0aea9147ef618f84ff3f22cc933 +size 1465 diff --git a/checkpoints/checkpoint-210/special_tokens_map.json b/checkpoints/checkpoint-210/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-210/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-210/tokenizer.json b/checkpoints/checkpoint-210/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-210/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-210/tokenizer_config.json b/checkpoints/checkpoint-210/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-210/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-210/trainer_state.json b/checkpoints/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9671de80915f21d41086d35cdd8e7688ba094c10 --- /dev/null +++ b/checkpoints/checkpoint-210/trainer_state.json @@ -0,0 +1,1520 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5296343001261034, + "eval_steps": 100, + "global_step": 210, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.359599141556224e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-210/training_args.bin b/checkpoints/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-215/README.md b/checkpoints/checkpoint-215/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-215/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-215/adapter_config.json b/checkpoints/checkpoint-215/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-215/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-215/adapter_model.safetensors b/checkpoints/checkpoint-215/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb499b4c534eef042b2808a552f7dfe5bc0c5afa --- /dev/null +++ b/checkpoints/checkpoint-215/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81f5564a9d05abab5e93f78dffc7510c86ca86e2f4e73e2d78c8075ca909f23 +size 74016 diff --git a/checkpoints/checkpoint-215/chat_template.jinja b/checkpoints/checkpoint-215/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-215/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-215/optimizer.pt b/checkpoints/checkpoint-215/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..15f463c813593d2406da84acf5c230e9cbe1d4c7 --- /dev/null +++ b/checkpoints/checkpoint-215/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6866f67b012845d631b2218467e972df92b24b1892197dabc1787b5ba5bdec9b +size 43813 diff --git a/checkpoints/checkpoint-215/rng_state.pth b/checkpoints/checkpoint-215/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-215/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-215/scheduler.pt b/checkpoints/checkpoint-215/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0acaf4a226dbeb1ffb13e128b4004e576e389b80 --- /dev/null +++ b/checkpoints/checkpoint-215/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dbd753966a17675ba6d772f710fd0b694ca8e3427be5078282c368b7ac8d867 +size 1465 diff --git a/checkpoints/checkpoint-215/special_tokens_map.json b/checkpoints/checkpoint-215/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-215/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-215/tokenizer.json b/checkpoints/checkpoint-215/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-215/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-215/tokenizer_config.json b/checkpoints/checkpoint-215/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-215/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-215/trainer_state.json b/checkpoints/checkpoint-215/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ff2d5cc8645690ff47037ca9148d250eaf324690 --- /dev/null +++ b/checkpoints/checkpoint-215/trainer_state.json @@ -0,0 +1,1555 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5422446406052963, + "eval_steps": 100, + "global_step": 215, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.415525977567232e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-215/training_args.bin b/checkpoints/checkpoint-215/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-215/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-220/README.md b/checkpoints/checkpoint-220/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-220/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-220/adapter_config.json b/checkpoints/checkpoint-220/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-220/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-220/adapter_model.safetensors b/checkpoints/checkpoint-220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae63bfb7232c0853f56af739a3f4b8ddf86d9142 --- /dev/null +++ b/checkpoints/checkpoint-220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5ef62f16fd6ebc3e973c1575a6d3a18fd9c0df8aec689860440a9adec4ce25 +size 74016 diff --git a/checkpoints/checkpoint-220/chat_template.jinja b/checkpoints/checkpoint-220/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-220/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-220/optimizer.pt b/checkpoints/checkpoint-220/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..548f41144124de88d975ee5835627048ba62f673 --- /dev/null +++ b/checkpoints/checkpoint-220/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13dc042a9ff777d283d2bec10cfe96f51073e6dd94062c53056f3908ccf7d833 +size 43813 diff --git a/checkpoints/checkpoint-220/rng_state.pth b/checkpoints/checkpoint-220/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-220/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-220/scheduler.pt b/checkpoints/checkpoint-220/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0e0145a858ffa0aeb0a7e10d491cde2e21957e8 --- /dev/null +++ b/checkpoints/checkpoint-220/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e842741dab5832f4a8b220e4e011f56188ace922943304e25b637be69a9edd +size 1465 diff --git a/checkpoints/checkpoint-220/special_tokens_map.json b/checkpoints/checkpoint-220/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-220/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-220/tokenizer.json b/checkpoints/checkpoint-220/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-220/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-220/tokenizer_config.json b/checkpoints/checkpoint-220/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-220/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-220/trainer_state.json b/checkpoints/checkpoint-220/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dcfd98445ba7639414180b0ef903570fbfbbf678 --- /dev/null +++ b/checkpoints/checkpoint-220/trainer_state.json @@ -0,0 +1,1590 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5548549810844893, + "eval_steps": 100, + "global_step": 220, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4728667448590336e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-220/training_args.bin b/checkpoints/checkpoint-220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-225/README.md b/checkpoints/checkpoint-225/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-225/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-225/adapter_config.json b/checkpoints/checkpoint-225/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-225/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-225/adapter_model.safetensors b/checkpoints/checkpoint-225/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cef3d2db467f07af2d50f2196a829df6df8044c --- /dev/null +++ b/checkpoints/checkpoint-225/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7819b01fc882f6b63dd6d93cbd85415b8dfaf0d0bf5e73b834219114d9fade6f +size 74016 diff --git a/checkpoints/checkpoint-225/chat_template.jinja b/checkpoints/checkpoint-225/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-225/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-225/optimizer.pt b/checkpoints/checkpoint-225/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c9cd3234597a340c5749e34a88f3d86271d6a32 --- /dev/null +++ b/checkpoints/checkpoint-225/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:835ffb7e791eb1768b02ca664ef1d8e6c8211033cc09a829622c6f354eb6d32c +size 43813 diff --git a/checkpoints/checkpoint-225/rng_state.pth b/checkpoints/checkpoint-225/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-225/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-225/scheduler.pt b/checkpoints/checkpoint-225/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fda4ea373ee78b5c729dd9df8198edd8153668f3 --- /dev/null +++ b/checkpoints/checkpoint-225/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243e6c8506e34afe17b0cc47fca64d468ec42669833d9a02467cb78bbeafa0ef +size 1465 diff --git a/checkpoints/checkpoint-225/special_tokens_map.json b/checkpoints/checkpoint-225/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-225/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-225/tokenizer.json b/checkpoints/checkpoint-225/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-225/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-225/tokenizer_config.json b/checkpoints/checkpoint-225/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-225/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-225/trainer_state.json b/checkpoints/checkpoint-225/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e20ee3bbd8d953b0da8ffed934dbbc00530182bb --- /dev/null +++ b/checkpoints/checkpoint-225/trainer_state.json @@ -0,0 +1,1625 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5674653215636822, + "eval_steps": 100, + "global_step": 225, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.531027051937792e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-225/training_args.bin b/checkpoints/checkpoint-225/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-225/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-230/README.md b/checkpoints/checkpoint-230/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-230/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-230/adapter_config.json b/checkpoints/checkpoint-230/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-230/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-230/adapter_model.safetensors b/checkpoints/checkpoint-230/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e29672e8905e2e2f60678b46a08e2047591db90 --- /dev/null +++ b/checkpoints/checkpoint-230/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6e0ea7bd051792b7340d7a803a350055a2aa9659fe007d22315dad560ef095 +size 74016 diff --git a/checkpoints/checkpoint-230/chat_template.jinja b/checkpoints/checkpoint-230/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-230/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-230/optimizer.pt b/checkpoints/checkpoint-230/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e109625fbaa5db7e5f2b8c2b99eec49945b5f04 --- /dev/null +++ b/checkpoints/checkpoint-230/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822ebf852408af1a619ea0f78c15803966a501b5921be59236368698d3cce27f +size 43813 diff --git a/checkpoints/checkpoint-230/rng_state.pth b/checkpoints/checkpoint-230/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-230/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-230/scheduler.pt b/checkpoints/checkpoint-230/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fcda74b603ccf71e0530ad23732f2ef2df25b56 --- /dev/null +++ b/checkpoints/checkpoint-230/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92ba6253aa1eb868e40fddf00971250b381b6b0d17eeb5eb3e368e34112796bb +size 1465 diff --git a/checkpoints/checkpoint-230/special_tokens_map.json b/checkpoints/checkpoint-230/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-230/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-230/tokenizer.json b/checkpoints/checkpoint-230/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-230/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-230/tokenizer_config.json b/checkpoints/checkpoint-230/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-230/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-230/trainer_state.json b/checkpoints/checkpoint-230/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..82d6426fb9d2c0ce4919d6530824f93a32bb5362 --- /dev/null +++ b/checkpoints/checkpoint-230/trainer_state.json @@ -0,0 +1,1660 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5800756620428752, + "eval_steps": 100, + "global_step": 230, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.588124659073024e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-230/training_args.bin b/checkpoints/checkpoint-230/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-230/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-235/README.md b/checkpoints/checkpoint-235/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-235/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-235/adapter_config.json b/checkpoints/checkpoint-235/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-235/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-235/adapter_model.safetensors b/checkpoints/checkpoint-235/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77f5de1ba26d9a4e61ba06021fe725141e00362c --- /dev/null +++ b/checkpoints/checkpoint-235/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86eef549f2e97a745c8491b971047cceb03a3317c80f263a3acb7df84d182841 +size 74016 diff --git a/checkpoints/checkpoint-235/chat_template.jinja b/checkpoints/checkpoint-235/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-235/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-235/optimizer.pt b/checkpoints/checkpoint-235/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdc376329a15d0d368968085cc45db6d55b8c52a --- /dev/null +++ b/checkpoints/checkpoint-235/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ed9ab67ea54c2a1e3751d42a95723f166bb230a7cc0484131e45b0ef67d6af +size 43813 diff --git a/checkpoints/checkpoint-235/rng_state.pth b/checkpoints/checkpoint-235/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-235/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-235/scheduler.pt b/checkpoints/checkpoint-235/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c090faa747d5bef778ffd62a3e1aceeb6634208 --- /dev/null +++ b/checkpoints/checkpoint-235/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b76daea639b966d6c05bf1f79f825e98a0c5c60f1e64f5e3ef7971da8637b5 +size 1465 diff --git a/checkpoints/checkpoint-235/special_tokens_map.json b/checkpoints/checkpoint-235/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-235/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-235/tokenizer.json b/checkpoints/checkpoint-235/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-235/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-235/tokenizer_config.json b/checkpoints/checkpoint-235/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-235/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-235/trainer_state.json b/checkpoints/checkpoint-235/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..71bda946f5469382018702b817dfe09b29fa78de --- /dev/null +++ b/checkpoints/checkpoint-235/trainer_state.json @@ -0,0 +1,1695 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.592686002522068, + "eval_steps": 100, + "global_step": 235, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.645618527204147e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-235/training_args.bin b/checkpoints/checkpoint-235/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-235/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-240/README.md b/checkpoints/checkpoint-240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-240/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-240/adapter_config.json b/checkpoints/checkpoint-240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-240/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-240/adapter_model.safetensors b/checkpoints/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3f76cb2e990e9ab11d8cb2b6c161d40edf04279 --- /dev/null +++ b/checkpoints/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9e5ad7ebf3bd9a15364546a1a44c929a41fded63a78f1b3067e7261674419b +size 74016 diff --git a/checkpoints/checkpoint-240/chat_template.jinja b/checkpoints/checkpoint-240/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-240/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-240/optimizer.pt b/checkpoints/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..21f38565901fd0814a75511bfbd1c3959fdf59d5 --- /dev/null +++ b/checkpoints/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe1fd37d6a34649205302d80a79d134f2dbd4589a1d379bd30db8b42111e4795 +size 43813 diff --git a/checkpoints/checkpoint-240/rng_state.pth b/checkpoints/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-240/scheduler.pt b/checkpoints/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b453ebb7919681d9420ce2f09571bc24b64acabb --- /dev/null +++ b/checkpoints/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c3ddf88294d43f57a6768468ea07b58af47f33321a2bbc8e26dc0c097bdcbc +size 1465 diff --git a/checkpoints/checkpoint-240/special_tokens_map.json b/checkpoints/checkpoint-240/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-240/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-240/tokenizer.json b/checkpoints/checkpoint-240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-240/tokenizer_config.json b/checkpoints/checkpoint-240/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-240/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-240/trainer_state.json b/checkpoints/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..df90c81f191376989c7d50abf6e8c5a336b9f6ab --- /dev/null +++ b/checkpoints/checkpoint-240/trainer_state.json @@ -0,0 +1,1730 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.605296343001261, + "eval_steps": 100, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.7016174106689536e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-240/training_args.bin b/checkpoints/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-245/README.md b/checkpoints/checkpoint-245/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-245/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-245/adapter_config.json b/checkpoints/checkpoint-245/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-245/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-245/adapter_model.safetensors b/checkpoints/checkpoint-245/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24fd83b463688c80feec52044a238b5b2f9625c9 --- /dev/null +++ b/checkpoints/checkpoint-245/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bad11c1cc2b3fe666496665ca6fcf1fd865b115deef671e423a7535ecd9c74 +size 74016 diff --git a/checkpoints/checkpoint-245/chat_template.jinja b/checkpoints/checkpoint-245/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-245/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-245/optimizer.pt b/checkpoints/checkpoint-245/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3205a72ee9cf95863d474cfb586aeb41a16139d --- /dev/null +++ b/checkpoints/checkpoint-245/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a7cf2c497fafe3ff5de6b239f057c9a4a35e5472130d80e69a85d1e932737b +size 43813 diff --git a/checkpoints/checkpoint-245/rng_state.pth b/checkpoints/checkpoint-245/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-245/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-245/scheduler.pt b/checkpoints/checkpoint-245/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3df27ccde6389cccadb24e52f363d280b0630a07 --- /dev/null +++ b/checkpoints/checkpoint-245/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea8f791bc415a24b30864ceb94770ccff2fa5cd82486fd2e10ff53fd2678123 +size 1465 diff --git a/checkpoints/checkpoint-245/special_tokens_map.json b/checkpoints/checkpoint-245/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-245/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-245/tokenizer.json b/checkpoints/checkpoint-245/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-245/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-245/tokenizer_config.json b/checkpoints/checkpoint-245/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-245/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-245/trainer_state.json b/checkpoints/checkpoint-245/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f4f10ef946a7b6fb93c614c34a7e296bcb6dcd4b --- /dev/null +++ b/checkpoints/checkpoint-245/trainer_state.json @@ -0,0 +1,1765 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.617906683480454, + "eval_steps": 100, + "global_step": 245, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.758246709354496e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-245/training_args.bin b/checkpoints/checkpoint-245/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-245/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-25/README.md b/checkpoints/checkpoint-25/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-25/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-25/adapter_config.json b/checkpoints/checkpoint-25/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-25/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-25/adapter_model.safetensors b/checkpoints/checkpoint-25/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f8382967d2993de6fba52bc78086270b393954b --- /dev/null +++ b/checkpoints/checkpoint-25/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:671da4f91557f73577f85942ddc00a5ef12ae945324df1b6a8094882d3e2917b +size 74016 diff --git a/checkpoints/checkpoint-25/chat_template.jinja b/checkpoints/checkpoint-25/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-25/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-25/optimizer.pt b/checkpoints/checkpoint-25/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d28d1d2308a2fc7c0762eab17bf7c75c41c2631 --- /dev/null +++ b/checkpoints/checkpoint-25/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a742e84ac1386ddbbcbd12580ac72aedc0fa25164573b9cd231ba92cba7668 +size 43813 diff --git a/checkpoints/checkpoint-25/rng_state.pth b/checkpoints/checkpoint-25/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-25/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-25/scheduler.pt b/checkpoints/checkpoint-25/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a30065458a8a5aedce85fcffb5298124119486c --- /dev/null +++ b/checkpoints/checkpoint-25/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dc9ace6019a2b9728c20346a2554b460d882f9d39169278ffe31b42fc0abd5 +size 1465 diff --git a/checkpoints/checkpoint-25/special_tokens_map.json b/checkpoints/checkpoint-25/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-25/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-25/tokenizer.json b/checkpoints/checkpoint-25/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-25/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-25/tokenizer_config.json b/checkpoints/checkpoint-25/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-25/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-25/trainer_state.json b/checkpoints/checkpoint-25/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a3917bd0e8fbac606bf3bb3192dbb7dff97f505 --- /dev/null +++ b/checkpoints/checkpoint-25/trainer_state.json @@ -0,0 +1,209 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.06305170239596469, + "eval_steps": 100, + "global_step": 25, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2805978147495936.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-25/training_args.bin b/checkpoints/checkpoint-25/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-25/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-250/README.md b/checkpoints/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-250/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-250/adapter_config.json b/checkpoints/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-250/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-250/adapter_model.safetensors b/checkpoints/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..079c97d66390d566a7b8e28de7d8b45f4e9c7e7b --- /dev/null +++ b/checkpoints/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92401c586f9bd070d2eb4eee733e39d9b13a7556cfeb9eb23837b45fcef64eb +size 74016 diff --git a/checkpoints/checkpoint-250/chat_template.jinja b/checkpoints/checkpoint-250/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-250/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-250/optimizer.pt b/checkpoints/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..82314d2c2629a9b0626388c8313938a2f6a57176 --- /dev/null +++ b/checkpoints/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b997c95445e2753d332eac4a124435647960762f9146bb9fd6760f8823cb2242 +size 43813 diff --git a/checkpoints/checkpoint-250/rng_state.pth b/checkpoints/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-250/scheduler.pt b/checkpoints/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5774913c84b88ce675c31ba8bdd32480bc6a99c8 --- /dev/null +++ b/checkpoints/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbf2f9e120fc08853a4c1b6bafbdb5b9efe6409c69b08cd72378e1544d14e68 +size 1465 diff --git a/checkpoints/checkpoint-250/special_tokens_map.json b/checkpoints/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-250/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-250/tokenizer.json b/checkpoints/checkpoint-250/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-250/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-250/tokenizer_config.json b/checkpoints/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-250/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-250/trainer_state.json b/checkpoints/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b166261b4e77eb326800aa0590c17330c8968c2b --- /dev/null +++ b/checkpoints/checkpoint-250/trainer_state.json @@ -0,0 +1,1800 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6305170239596469, + "eval_steps": 100, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.814029450457907e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-250/training_args.bin b/checkpoints/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-255/README.md b/checkpoints/checkpoint-255/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-255/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-255/adapter_config.json b/checkpoints/checkpoint-255/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-255/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-255/adapter_model.safetensors b/checkpoints/checkpoint-255/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..093552d3a961cfc289082a21cb90e858e678a601 --- /dev/null +++ b/checkpoints/checkpoint-255/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a824e52fc095286926a70e2ba52ab4aa096a877ca6df312b1c3d60db0dcd752f +size 74016 diff --git a/checkpoints/checkpoint-255/chat_template.jinja b/checkpoints/checkpoint-255/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-255/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-255/optimizer.pt b/checkpoints/checkpoint-255/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c691aef2ca584d9564ed5c3c170bd382275255c --- /dev/null +++ b/checkpoints/checkpoint-255/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1657998f1e939efb9ae4d1d8968912ddad9652a591ee1d4a5105e2ff463209 +size 43813 diff --git a/checkpoints/checkpoint-255/rng_state.pth b/checkpoints/checkpoint-255/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-255/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-255/scheduler.pt b/checkpoints/checkpoint-255/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35a7d5e80e6bff60411ab76e4d73e19bb490143 --- /dev/null +++ b/checkpoints/checkpoint-255/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974ee8e367ecb0fd5f84fd49ea86236c93403d8d1b54f8e5d67f782e1adaf6a6 +size 1465 diff --git a/checkpoints/checkpoint-255/special_tokens_map.json b/checkpoints/checkpoint-255/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-255/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-255/tokenizer.json b/checkpoints/checkpoint-255/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-255/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-255/tokenizer_config.json b/checkpoints/checkpoint-255/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-255/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-255/trainer_state.json b/checkpoints/checkpoint-255/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..124eea06c89771915be6b37dade1dc50dc955ce9 --- /dev/null +++ b/checkpoints/checkpoint-255/trainer_state.json @@ -0,0 +1,1835 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6431273644388399, + "eval_steps": 100, + "global_step": 255, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.86996529240064e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-255/training_args.bin b/checkpoints/checkpoint-255/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-255/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-260/README.md b/checkpoints/checkpoint-260/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-260/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-260/adapter_config.json b/checkpoints/checkpoint-260/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-260/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-260/adapter_model.safetensors b/checkpoints/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf3cb0086b8b63fd657d7c0c1475dcae5a8aa1c7 --- /dev/null +++ b/checkpoints/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dd3a444a995859ff96ce221f9193b8912e050dfe1dbb3fb04073276facaa44 +size 74016 diff --git a/checkpoints/checkpoint-260/chat_template.jinja b/checkpoints/checkpoint-260/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-260/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-260/optimizer.pt b/checkpoints/checkpoint-260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..503e41a3f0e7ba30619b2187a9b8bb712c472a52 --- /dev/null +++ b/checkpoints/checkpoint-260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9316b4c93f13499b40e35ba383dd3386152e28a684bc6f7a73106482753767b +size 43813 diff --git a/checkpoints/checkpoint-260/rng_state.pth b/checkpoints/checkpoint-260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-260/scheduler.pt b/checkpoints/checkpoint-260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..776542df0e4a69c6cdf6ff3ffd6e58fdd9a6e4fc --- /dev/null +++ b/checkpoints/checkpoint-260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7977c50bbd96698bc74fb946fa8c04a9b3416f87cf2b5c362d393edfa17af4d8 +size 1465 diff --git a/checkpoints/checkpoint-260/special_tokens_map.json b/checkpoints/checkpoint-260/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-260/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-260/tokenizer.json b/checkpoints/checkpoint-260/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-260/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-260/tokenizer_config.json b/checkpoints/checkpoint-260/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-260/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-260/trainer_state.json b/checkpoints/checkpoint-260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dfd82bc3e0870887df7b9a0ee21622b92cd7cdcf --- /dev/null +++ b/checkpoints/checkpoint-260/trainer_state.json @@ -0,0 +1,1870 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6557377049180327, + "eval_steps": 100, + "global_step": 260, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.927161964784845e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-260/training_args.bin b/checkpoints/checkpoint-260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-265/README.md b/checkpoints/checkpoint-265/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-265/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-265/adapter_config.json b/checkpoints/checkpoint-265/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-265/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-265/adapter_model.safetensors b/checkpoints/checkpoint-265/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4540bf3108b781c993f3f6b9824dd1b3ae0c8eb --- /dev/null +++ b/checkpoints/checkpoint-265/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581748d6578c28fa9e63cae7e9d8d705591167a5173aa747ae68aa0901a8ce46 +size 74016 diff --git a/checkpoints/checkpoint-265/chat_template.jinja b/checkpoints/checkpoint-265/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-265/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-265/optimizer.pt b/checkpoints/checkpoint-265/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..206f040f0daf93934e4ac8220fdf307e633893ed --- /dev/null +++ b/checkpoints/checkpoint-265/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f7c71edd4e6d9469e9f2bc83aee27a13f4f97c5557a1a083fdd60b09dd0349 +size 43813 diff --git a/checkpoints/checkpoint-265/rng_state.pth b/checkpoints/checkpoint-265/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-265/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-265/scheduler.pt b/checkpoints/checkpoint-265/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..579cdd12221ef0a1ce41601dcbf2977e9bc7fdc8 --- /dev/null +++ b/checkpoints/checkpoint-265/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f2d314d20016bd4e4a56134ba2b7f1f667938f1c700d4706dafa75996ab96e +size 1465 diff --git a/checkpoints/checkpoint-265/special_tokens_map.json b/checkpoints/checkpoint-265/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-265/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-265/tokenizer.json b/checkpoints/checkpoint-265/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-265/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-265/tokenizer_config.json b/checkpoints/checkpoint-265/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-265/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-265/trainer_state.json b/checkpoints/checkpoint-265/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f2ea9fca7b780847d4a2024170071917eedd0b4 --- /dev/null +++ b/checkpoints/checkpoint-265/trainer_state.json @@ -0,0 +1,1905 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6683480453972257, + "eval_steps": 100, + "global_step": 265, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.984250565988352e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-265/training_args.bin b/checkpoints/checkpoint-265/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-265/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-270/README.md b/checkpoints/checkpoint-270/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-270/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-270/adapter_config.json b/checkpoints/checkpoint-270/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-270/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-270/adapter_model.safetensors b/checkpoints/checkpoint-270/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..467b44d2c13d2334a3ad88ca51c621d9d29d3637 --- /dev/null +++ b/checkpoints/checkpoint-270/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a839dafca79d6206a8670f870f2af87aec9fdc3204cb1930a4d95f696b290135 +size 74016 diff --git a/checkpoints/checkpoint-270/chat_template.jinja b/checkpoints/checkpoint-270/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-270/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-270/optimizer.pt b/checkpoints/checkpoint-270/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a46f45377d7dd3bcdeb68d11151a1dccc949491 --- /dev/null +++ b/checkpoints/checkpoint-270/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27a54fc1ed294721b17f015d1f7c50f9d84ba5cb2ed791d23bc43a8db56ff76 +size 43813 diff --git a/checkpoints/checkpoint-270/rng_state.pth b/checkpoints/checkpoint-270/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-270/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-270/scheduler.pt b/checkpoints/checkpoint-270/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5497593929bd6d243a69caf38b9c42a0f945eac --- /dev/null +++ b/checkpoints/checkpoint-270/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d379d79076dfe0d603f997647471f73ed75425774216fdf5f3f9e9ce71ee87 +size 1465 diff --git a/checkpoints/checkpoint-270/special_tokens_map.json b/checkpoints/checkpoint-270/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-270/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-270/tokenizer.json b/checkpoints/checkpoint-270/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-270/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-270/tokenizer_config.json b/checkpoints/checkpoint-270/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-270/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-270/trainer_state.json b/checkpoints/checkpoint-270/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0950bf0254093164b7f69dca12b6dbb43c134a5 --- /dev/null +++ b/checkpoints/checkpoint-270/trainer_state.json @@ -0,0 +1,1940 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6809583858764187, + "eval_steps": 100, + "global_step": 270, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.0389345834213376e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-270/training_args.bin b/checkpoints/checkpoint-270/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-270/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-275/README.md b/checkpoints/checkpoint-275/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-275/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-275/adapter_config.json b/checkpoints/checkpoint-275/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-275/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-275/adapter_model.safetensors b/checkpoints/checkpoint-275/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27fd4deddc2af3729b7ec2832686a3635bdabc4b --- /dev/null +++ b/checkpoints/checkpoint-275/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3ca49f34b93627e4f47e7478dfe76d2b0c961a1abea15c24d72c12b259dc6d +size 74016 diff --git a/checkpoints/checkpoint-275/chat_template.jinja b/checkpoints/checkpoint-275/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-275/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-275/optimizer.pt b/checkpoints/checkpoint-275/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d75cf37a07f50652876dbef12a863e64210f9524 --- /dev/null +++ b/checkpoints/checkpoint-275/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c00edc31e591f699511ef4e66660d095cf7ca39cb96eb2ecc800c962392127 +size 43813 diff --git a/checkpoints/checkpoint-275/rng_state.pth b/checkpoints/checkpoint-275/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-275/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-275/scheduler.pt b/checkpoints/checkpoint-275/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..553197b699721c3e20fc8b6a12461fbceae31221 --- /dev/null +++ b/checkpoints/checkpoint-275/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd144dad190248d775359e4ef6d86c3848f7ee6406ffb6155c50088c96dd6ca +size 1465 diff --git a/checkpoints/checkpoint-275/special_tokens_map.json b/checkpoints/checkpoint-275/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-275/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-275/tokenizer.json b/checkpoints/checkpoint-275/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-275/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-275/tokenizer_config.json b/checkpoints/checkpoint-275/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-275/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-275/trainer_state.json b/checkpoints/checkpoint-275/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..15bd13a6eb004584d2846994e178033e3f313075 --- /dev/null +++ b/checkpoints/checkpoint-275/trainer_state.json @@ -0,0 +1,1975 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6935687263556116, + "eval_steps": 100, + "global_step": 275, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.0959061075124224e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-275/training_args.bin b/checkpoints/checkpoint-275/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-275/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-280/README.md b/checkpoints/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-280/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-280/adapter_config.json b/checkpoints/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-280/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-280/adapter_model.safetensors b/checkpoints/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..032bb8b2e3b65e419d3e58db7b7467bba8bcf5e1 --- /dev/null +++ b/checkpoints/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bbce80662d51077517f711e6f06ce9d0e7c25948c0a91b5f9360e4f281940e +size 74016 diff --git a/checkpoints/checkpoint-280/chat_template.jinja b/checkpoints/checkpoint-280/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-280/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-280/optimizer.pt b/checkpoints/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a16251bf2a9f9a437c4a6ff92421127146de4a0 --- /dev/null +++ b/checkpoints/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc8f6bc63815d08c15e1883fe7ce3b1a4dcd7f5261d09b82f5e1f771ebba955e +size 43813 diff --git a/checkpoints/checkpoint-280/rng_state.pth b/checkpoints/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-280/scheduler.pt b/checkpoints/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06dfbb522b23a6650608e1d782ac9ece3a792731 --- /dev/null +++ b/checkpoints/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d90c54f1c19f6ed09c6387c830af06f276122506bd071cad17b0d7098802f8a +size 1465 diff --git a/checkpoints/checkpoint-280/special_tokens_map.json b/checkpoints/checkpoint-280/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-280/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-280/tokenizer.json b/checkpoints/checkpoint-280/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-280/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-280/tokenizer_config.json b/checkpoints/checkpoint-280/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-280/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-280/trainer_state.json b/checkpoints/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1311050270c5b02f3eaa78a397282f598e626a8 --- /dev/null +++ b/checkpoints/checkpoint-280/trainer_state.json @@ -0,0 +1,2010 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7061790668348046, + "eval_steps": 100, + "global_step": 280, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.151814931659981e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-280/training_args.bin b/checkpoints/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-285/README.md b/checkpoints/checkpoint-285/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-285/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-285/adapter_config.json b/checkpoints/checkpoint-285/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-285/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-285/adapter_model.safetensors b/checkpoints/checkpoint-285/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..730a811c505d90bb216a279bef8e5495030fd505 --- /dev/null +++ b/checkpoints/checkpoint-285/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40fff062ed7d9a4c54e924f4e32a979d7705f507b97f9c8884c10008367a3100 +size 74016 diff --git a/checkpoints/checkpoint-285/chat_template.jinja b/checkpoints/checkpoint-285/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-285/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-285/optimizer.pt b/checkpoints/checkpoint-285/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c117b3a345c51f7d20ee3fdd2ac9da39ee8d4471 --- /dev/null +++ b/checkpoints/checkpoint-285/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d5e45d5c0d1ccb7fea74ba9f1713be41d24e3609b0549f7174b1998a951638 +size 43813 diff --git a/checkpoints/checkpoint-285/rng_state.pth b/checkpoints/checkpoint-285/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-285/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-285/scheduler.pt b/checkpoints/checkpoint-285/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df09538d7d7d89febcd55bc7ec3967efb1ee4c57 --- /dev/null +++ b/checkpoints/checkpoint-285/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f66dee334756fd0e0e2cf55a8a6fc45799c020b1b02da8f78480bc436a19c5d +size 1465 diff --git a/checkpoints/checkpoint-285/special_tokens_map.json b/checkpoints/checkpoint-285/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-285/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-285/tokenizer.json b/checkpoints/checkpoint-285/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-285/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-285/tokenizer_config.json b/checkpoints/checkpoint-285/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-285/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-285/trainer_state.json b/checkpoints/checkpoint-285/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..feb610a92955c3fa17f1d9f58ddf617c01cbcae0 --- /dev/null +++ b/checkpoints/checkpoint-285/trainer_state.json @@ -0,0 +1,2045 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7187894073139974, + "eval_steps": 100, + "global_step": 285, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.208147034598605e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-285/training_args.bin b/checkpoints/checkpoint-285/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-285/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-290/README.md b/checkpoints/checkpoint-290/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-290/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-290/adapter_config.json b/checkpoints/checkpoint-290/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-290/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-290/adapter_model.safetensors b/checkpoints/checkpoint-290/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15e521d8408dd4c3b774c9808703ddd6373b4caa --- /dev/null +++ b/checkpoints/checkpoint-290/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a9fe334d1bda9503276a38f5fa440a80069ceb6985b311cc21e8435c397b9c +size 74016 diff --git a/checkpoints/checkpoint-290/chat_template.jinja b/checkpoints/checkpoint-290/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-290/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-290/optimizer.pt b/checkpoints/checkpoint-290/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d186aae1c5d3cbcb7d0f6f9c495046b0ee81904 --- /dev/null +++ b/checkpoints/checkpoint-290/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cca22b28ae82b14d77c9886778a4c64fed513f75f8429e22937c4d99088f92d +size 43813 diff --git a/checkpoints/checkpoint-290/rng_state.pth b/checkpoints/checkpoint-290/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-290/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-290/scheduler.pt b/checkpoints/checkpoint-290/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f265a846e0ff405a28bff666e3bf234497d6dd48 --- /dev/null +++ b/checkpoints/checkpoint-290/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a909198e907fc5706177ad04bb3bfa534cb12f46872764cf62e38366c40aed57 +size 1465 diff --git a/checkpoints/checkpoint-290/special_tokens_map.json b/checkpoints/checkpoint-290/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-290/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-290/tokenizer.json b/checkpoints/checkpoint-290/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-290/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-290/tokenizer_config.json b/checkpoints/checkpoint-290/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-290/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-290/trainer_state.json b/checkpoints/checkpoint-290/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c850b6be38d6be8d61c3a5402a0c2c16cacdc059 --- /dev/null +++ b/checkpoints/checkpoint-290/trainer_state.json @@ -0,0 +1,2080 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7313997477931904, + "eval_steps": 100, + "global_step": 290, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.265821021364224e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-290/training_args.bin b/checkpoints/checkpoint-290/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-290/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-295/README.md b/checkpoints/checkpoint-295/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-295/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-295/adapter_config.json b/checkpoints/checkpoint-295/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-295/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-295/adapter_model.safetensors b/checkpoints/checkpoint-295/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51705ce3567414cd0d051465d1f0ac389949d0c1 --- /dev/null +++ b/checkpoints/checkpoint-295/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1527a226737ec95167ae2b0d4483d397deffe74bf1a606ca6c00f0e57e910ad +size 74016 diff --git a/checkpoints/checkpoint-295/chat_template.jinja b/checkpoints/checkpoint-295/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-295/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-295/optimizer.pt b/checkpoints/checkpoint-295/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0a7be4a501323845bda7c5f4c05d655f494c92d --- /dev/null +++ b/checkpoints/checkpoint-295/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8410c7719d3e0ce5e42afb21a38817c9bb17bdbe0c72f833ba76b0edbffd500 +size 43813 diff --git a/checkpoints/checkpoint-295/rng_state.pth b/checkpoints/checkpoint-295/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..27efe79200462dc7ec248017016a4968efc39ca1 --- /dev/null +++ b/checkpoints/checkpoint-295/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4684d51fc75f779aa7923c633e4c75a716ac73280dc8e9335cb1439e364bde19 +size 14581 diff --git a/checkpoints/checkpoint-295/scheduler.pt b/checkpoints/checkpoint-295/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8410738938bdcb7301df317288cf0dee143e7212 --- /dev/null +++ b/checkpoints/checkpoint-295/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3ac4148a25f18382221038e6b8868da19a9fcbccba07e0afa6631cc834fe3dd +size 1465 diff --git a/checkpoints/checkpoint-295/special_tokens_map.json b/checkpoints/checkpoint-295/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-295/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-295/tokenizer.json b/checkpoints/checkpoint-295/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-295/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-295/tokenizer_config.json b/checkpoints/checkpoint-295/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-295/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-295/trainer_state.json b/checkpoints/checkpoint-295/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92fe0e0a59787a92712403bc09d224e0a61634cb --- /dev/null +++ b/checkpoints/checkpoint-295/trainer_state.json @@ -0,0 +1,2115 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7440100882723834, + "eval_steps": 100, + "global_step": 295, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.3207752167489536e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-295/training_args.bin b/checkpoints/checkpoint-295/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-295/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-30/README.md b/checkpoints/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-30/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-30/adapter_config.json b/checkpoints/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-30/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-30/adapter_model.safetensors b/checkpoints/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07b04ae58179ee2edd22122ac005038fe7c28b1b --- /dev/null +++ b/checkpoints/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6b0f0f8be18eb4996c9a07cef4ab9fd3bdf1454ed68987e49f03774a53dcc1 +size 74016 diff --git a/checkpoints/checkpoint-30/chat_template.jinja b/checkpoints/checkpoint-30/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-30/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-30/optimizer.pt b/checkpoints/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a434e7e103251f850160218118d0c1ced0922977 --- /dev/null +++ b/checkpoints/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0843b572d220ea63c1059f7110ddbced0ad916f08b066df055b34abc6c5b984 +size 43813 diff --git a/checkpoints/checkpoint-30/rng_state.pth b/checkpoints/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-30/scheduler.pt b/checkpoints/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f73c8d6159216f64da7c65bb2f34688265f81cd --- /dev/null +++ b/checkpoints/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79bc3c8496017652eacbb97371112ed9f204687c83413137ac0ca43459c97aa1 +size 1465 diff --git a/checkpoints/checkpoint-30/special_tokens_map.json b/checkpoints/checkpoint-30/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-30/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-30/tokenizer.json b/checkpoints/checkpoint-30/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-30/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-30/tokenizer_config.json b/checkpoints/checkpoint-30/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-30/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-30/trainer_state.json b/checkpoints/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..67526dfd2ac8dc879677622e1001eb178e42c8a6 --- /dev/null +++ b/checkpoints/checkpoint-30/trainer_state.json @@ -0,0 +1,244 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.07566204287515763, + "eval_steps": 100, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3361464016281600.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-30/training_args.bin b/checkpoints/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-300/README.md b/checkpoints/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-300/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-300/adapter_config.json b/checkpoints/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-300/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-300/adapter_model.safetensors b/checkpoints/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a69b88c26d475b7cbd0363073e7cff5ddc836b3 --- /dev/null +++ b/checkpoints/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6093e8d71665148dc0834b6cc4f1cb1a9d558d99cb8625521c97c0e29da401c +size 74016 diff --git a/checkpoints/checkpoint-300/chat_template.jinja b/checkpoints/checkpoint-300/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-300/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-300/optimizer.pt b/checkpoints/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e15b47cbb31b0b19356f63d553df5c9e3a780b4b --- /dev/null +++ b/checkpoints/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6484075c6967752f67d342250eb2045b4eed7f71d5f9f7facb00ff3616ca5589 +size 43813 diff --git a/checkpoints/checkpoint-300/rng_state.pth b/checkpoints/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-300/scheduler.pt b/checkpoints/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb76a48f8ddea3bec9280e93c444bb1d071ddfc5 --- /dev/null +++ b/checkpoints/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794e9a3813693a751baa493bd4c469b6123b00fae4ecae572ea47b8479d6a6ff +size 1465 diff --git a/checkpoints/checkpoint-300/special_tokens_map.json b/checkpoints/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-300/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-300/tokenizer.json b/checkpoints/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-300/tokenizer_config.json b/checkpoints/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-300/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-300/trainer_state.json b/checkpoints/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a0e0f2680abf91da36f4861a5cf5823cf5316e3f --- /dev/null +++ b/checkpoints/checkpoint-300/trainer_state.json @@ -0,0 +1,2158 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7566204287515763, + "eval_steps": 100, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.377440539161395e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-300/training_args.bin b/checkpoints/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-305/README.md b/checkpoints/checkpoint-305/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-305/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-305/adapter_config.json b/checkpoints/checkpoint-305/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-305/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-305/adapter_model.safetensors b/checkpoints/checkpoint-305/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97b3dcc4b2245a8fb702a82e5a27d39eac5fcfa6 --- /dev/null +++ b/checkpoints/checkpoint-305/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbee762dea73de62474626b9832d11bc0a2d57b9b070da8298ed545c1c7cd7c4 +size 74016 diff --git a/checkpoints/checkpoint-305/chat_template.jinja b/checkpoints/checkpoint-305/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-305/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-305/optimizer.pt b/checkpoints/checkpoint-305/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb64c00338a004ad34c06e9aaa0d3b622dc5ca81 --- /dev/null +++ b/checkpoints/checkpoint-305/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef2367bd82a5c348f4cf94d15e5992aba7bb4411df55e1d02bc5b01277625ee +size 43813 diff --git a/checkpoints/checkpoint-305/rng_state.pth b/checkpoints/checkpoint-305/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-305/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-305/scheduler.pt b/checkpoints/checkpoint-305/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..924ffc30aae3c22b0ee0e6acdd963411c54df19b --- /dev/null +++ b/checkpoints/checkpoint-305/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9968280808143398ed6a1b7d89b59a4275f934962532400a775f25610da7ebbd +size 1465 diff --git a/checkpoints/checkpoint-305/special_tokens_map.json b/checkpoints/checkpoint-305/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-305/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-305/tokenizer.json b/checkpoints/checkpoint-305/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-305/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-305/tokenizer_config.json b/checkpoints/checkpoint-305/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-305/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-305/trainer_state.json b/checkpoints/checkpoint-305/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac89293e414efe1f5230b7996f61743db672c81e --- /dev/null +++ b/checkpoints/checkpoint-305/trainer_state.json @@ -0,0 +1,2193 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7692307692307693, + "eval_steps": 100, + "global_step": 305, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.432241633706803e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-305/training_args.bin b/checkpoints/checkpoint-305/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-305/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-310/README.md b/checkpoints/checkpoint-310/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-310/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-310/adapter_config.json b/checkpoints/checkpoint-310/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-310/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-310/adapter_model.safetensors b/checkpoints/checkpoint-310/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b50074bcc1b5114f04c6ac1bcf6842bfb07d2089 --- /dev/null +++ b/checkpoints/checkpoint-310/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11b1fa82e0accb9711a9618fbeeae75cf17f554c0c2423321984ba65fcef9bf +size 74016 diff --git a/checkpoints/checkpoint-310/chat_template.jinja b/checkpoints/checkpoint-310/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-310/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-310/optimizer.pt b/checkpoints/checkpoint-310/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b1fcb2b5d4465bf5cd44d384849ae6c372aca8c --- /dev/null +++ b/checkpoints/checkpoint-310/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093a741c4023a1ef2d52c3474e38268561e1ed93c667408fb8dcc815d609ea8d +size 43813 diff --git a/checkpoints/checkpoint-310/rng_state.pth b/checkpoints/checkpoint-310/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-310/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-310/scheduler.pt b/checkpoints/checkpoint-310/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..593ded785741662a028f56717508348ff3096e44 --- /dev/null +++ b/checkpoints/checkpoint-310/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:587894a39cc3c150059fb29c66e53bf8aa6882d85638d646e4af818a3032f3d4 +size 1465 diff --git a/checkpoints/checkpoint-310/special_tokens_map.json b/checkpoints/checkpoint-310/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-310/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-310/tokenizer.json b/checkpoints/checkpoint-310/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-310/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-310/tokenizer_config.json b/checkpoints/checkpoint-310/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-310/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-310/trainer_state.json b/checkpoints/checkpoint-310/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6fcc371e94c1d815eb03debe8bb82ae7d1de168c --- /dev/null +++ b/checkpoints/checkpoint-310/trainer_state.json @@ -0,0 +1,2228 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7818411097099621, + "eval_steps": 100, + "global_step": 310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.489267193388237e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-310/training_args.bin b/checkpoints/checkpoint-310/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-310/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-315/README.md b/checkpoints/checkpoint-315/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-315/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-315/adapter_config.json b/checkpoints/checkpoint-315/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-315/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-315/adapter_model.safetensors b/checkpoints/checkpoint-315/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63fc8740dff1d4cc2a0a6d968e722ad873f31905 --- /dev/null +++ b/checkpoints/checkpoint-315/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1285b5c1a835aed6e507fa8954d06933ede1b6e509998a86a9b67211af6d02 +size 74016 diff --git a/checkpoints/checkpoint-315/chat_template.jinja b/checkpoints/checkpoint-315/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-315/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-315/optimizer.pt b/checkpoints/checkpoint-315/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9049d5c462ffc58e9235419cd188928adc89ad --- /dev/null +++ b/checkpoints/checkpoint-315/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf90f80c57d8d7aef415dc8f3a3bb6f8f342e626dc43eb0b87688cace5d6bd1 +size 43813 diff --git a/checkpoints/checkpoint-315/rng_state.pth b/checkpoints/checkpoint-315/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-315/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-315/scheduler.pt b/checkpoints/checkpoint-315/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..531b8b5ed9a5636cecd474eeedf10701830b54af --- /dev/null +++ b/checkpoints/checkpoint-315/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63262811432ed34597e3d79c4394a5c585f33c9cf9001379f893ff7b56337117 +size 1465 diff --git a/checkpoints/checkpoint-315/special_tokens_map.json b/checkpoints/checkpoint-315/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-315/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-315/tokenizer.json b/checkpoints/checkpoint-315/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-315/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-315/tokenizer_config.json b/checkpoints/checkpoint-315/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-315/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-315/trainer_state.json b/checkpoints/checkpoint-315/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf889e5724648a8f94206ffbf51468e3d5526870 --- /dev/null +++ b/checkpoints/checkpoint-315/trainer_state.json @@ -0,0 +1,2263 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7944514501891551, + "eval_steps": 100, + "global_step": 315, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.546526907294515e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-315/training_args.bin b/checkpoints/checkpoint-315/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-315/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-320/README.md b/checkpoints/checkpoint-320/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-320/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-320/adapter_config.json b/checkpoints/checkpoint-320/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-320/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-320/adapter_model.safetensors b/checkpoints/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b829f33677bea40850cd113ed4fd88bfe4a35968 --- /dev/null +++ b/checkpoints/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3435de9cb4656511f640f228520403cebf0cbc9b861160f349b2d237d7bfe3df +size 74016 diff --git a/checkpoints/checkpoint-320/chat_template.jinja b/checkpoints/checkpoint-320/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-320/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-320/optimizer.pt b/checkpoints/checkpoint-320/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cc322f8639b2309043b8bd0e10a26a2fbbd89dd --- /dev/null +++ b/checkpoints/checkpoint-320/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68942560406af12c4cf7b4f2ed83fdc1b64447103874fa29f59fab08bafc99d +size 43813 diff --git a/checkpoints/checkpoint-320/rng_state.pth b/checkpoints/checkpoint-320/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-320/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-320/scheduler.pt b/checkpoints/checkpoint-320/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4f5b30908ceab72809ac4e360212bb7955afe06 --- /dev/null +++ b/checkpoints/checkpoint-320/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebddd736ccd70376fdd6eb3d0feea2b93a82052a161e00dbb6eb67aa0e6682c5 +size 1465 diff --git a/checkpoints/checkpoint-320/special_tokens_map.json b/checkpoints/checkpoint-320/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-320/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-320/tokenizer.json b/checkpoints/checkpoint-320/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-320/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-320/tokenizer_config.json b/checkpoints/checkpoint-320/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-320/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-320/trainer_state.json b/checkpoints/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9a6f05d50a3957e2d8b9df0c7a217f45ec33663 --- /dev/null +++ b/checkpoints/checkpoint-320/trainer_state.json @@ -0,0 +1,2298 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.807061790668348, + "eval_steps": 100, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.6014991145426944e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-320/training_args.bin b/checkpoints/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-325/README.md b/checkpoints/checkpoint-325/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-325/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-325/adapter_config.json b/checkpoints/checkpoint-325/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-325/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-325/adapter_model.safetensors b/checkpoints/checkpoint-325/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3171df6942ce3eb0d03143d01c5ae508c9f217e8 --- /dev/null +++ b/checkpoints/checkpoint-325/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d755c82c0148f900e9c9c7212ba2c82b652fbaf8f9c3111872f91d258d9f40bd +size 74016 diff --git a/checkpoints/checkpoint-325/chat_template.jinja b/checkpoints/checkpoint-325/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-325/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-325/optimizer.pt b/checkpoints/checkpoint-325/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f3de19d4bf42d1f6174ac5729b47b9600acb583 --- /dev/null +++ b/checkpoints/checkpoint-325/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff894e39af9e8d3249362fa43bf68ca9c32c04022da00b9e413c60677d2e697 +size 43813 diff --git a/checkpoints/checkpoint-325/rng_state.pth b/checkpoints/checkpoint-325/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-325/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-325/scheduler.pt b/checkpoints/checkpoint-325/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6a4ce088ef3390c34f67016cf1301ca6de770ca --- /dev/null +++ b/checkpoints/checkpoint-325/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71ceab76addee7677a67458d13e464cacaf280f08dd6d8efdb7bc440141a991 +size 1465 diff --git a/checkpoints/checkpoint-325/special_tokens_map.json b/checkpoints/checkpoint-325/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-325/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-325/tokenizer.json b/checkpoints/checkpoint-325/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-325/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-325/tokenizer_config.json b/checkpoints/checkpoint-325/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-325/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-325/trainer_state.json b/checkpoints/checkpoint-325/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e00f044af5d5d8b2a07369c59fbf68002da2c3 --- /dev/null +++ b/checkpoints/checkpoint-325/trainer_state.json @@ -0,0 +1,2333 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.819672131147541, + "eval_steps": 100, + "global_step": 325, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.658308531862733e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-325/training_args.bin b/checkpoints/checkpoint-325/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-325/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-330/README.md b/checkpoints/checkpoint-330/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-330/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-330/adapter_config.json b/checkpoints/checkpoint-330/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-330/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-330/adapter_model.safetensors b/checkpoints/checkpoint-330/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1e12d02ed12c56eed9c1ffb72b637b354a31170 --- /dev/null +++ b/checkpoints/checkpoint-330/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a33e4f29e4844e610a74589cae11d27358c2701873018178986ff479323960 +size 74016 diff --git a/checkpoints/checkpoint-330/chat_template.jinja b/checkpoints/checkpoint-330/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-330/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-330/optimizer.pt b/checkpoints/checkpoint-330/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bbf770ff42a4e86f61f4832eb7e07ec1a35455d --- /dev/null +++ b/checkpoints/checkpoint-330/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a03067ea1c8506bad6b77099fe1a4ffa05868d509b3e15cb037e6d661477ad0 +size 43813 diff --git a/checkpoints/checkpoint-330/rng_state.pth b/checkpoints/checkpoint-330/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-330/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-330/scheduler.pt b/checkpoints/checkpoint-330/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7a1604493ce6e99ffb1acb83c2248df4cbcd4a0 --- /dev/null +++ b/checkpoints/checkpoint-330/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5be53248e4bd911cc8e7dd283e81be626be8a5c09afd82a1589487aa97e6bc +size 1465 diff --git a/checkpoints/checkpoint-330/special_tokens_map.json b/checkpoints/checkpoint-330/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-330/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-330/tokenizer.json b/checkpoints/checkpoint-330/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-330/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-330/tokenizer_config.json b/checkpoints/checkpoint-330/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-330/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-330/trainer_state.json b/checkpoints/checkpoint-330/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f5f43893788c412346d50ad44ae762322e49ac7 --- /dev/null +++ b/checkpoints/checkpoint-330/trainer_state.json @@ -0,0 +1,2368 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.832282471626734, + "eval_steps": 100, + "global_step": 330, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.714127296693043e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-330/training_args.bin b/checkpoints/checkpoint-330/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-330/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-335/README.md b/checkpoints/checkpoint-335/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-335/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-335/adapter_config.json b/checkpoints/checkpoint-335/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-335/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-335/adapter_model.safetensors b/checkpoints/checkpoint-335/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd37295a621eb6eac28383df3af8604992e4f6a4 --- /dev/null +++ b/checkpoints/checkpoint-335/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c8d8e5c139c21554a99efb0db3567add2077917a5139b422b8b6f1cff86684 +size 74016 diff --git a/checkpoints/checkpoint-335/chat_template.jinja b/checkpoints/checkpoint-335/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-335/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-335/optimizer.pt b/checkpoints/checkpoint-335/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bbcc278efe3c4c909133bb69741a292ab496166 --- /dev/null +++ b/checkpoints/checkpoint-335/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cc9e0e6e45147f51fadd82d64f3d40874a0f4981805806f4a84a7c24957b5a +size 43813 diff --git a/checkpoints/checkpoint-335/rng_state.pth b/checkpoints/checkpoint-335/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-335/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-335/scheduler.pt b/checkpoints/checkpoint-335/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e215c109529a33edf8e4643983224e1531134932 --- /dev/null +++ b/checkpoints/checkpoint-335/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81bd6baf3e7ed005f843b4ab054c97f7d21c14602128f079f0afa90a27eafaa +size 1465 diff --git a/checkpoints/checkpoint-335/special_tokens_map.json b/checkpoints/checkpoint-335/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-335/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-335/tokenizer.json b/checkpoints/checkpoint-335/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-335/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-335/tokenizer_config.json b/checkpoints/checkpoint-335/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-335/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-335/trainer_state.json b/checkpoints/checkpoint-335/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..db83a2ef7eca210008a80ad4e5f328a78d6525f1 --- /dev/null +++ b/checkpoints/checkpoint-335/trainer_state.json @@ -0,0 +1,2403 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8448928121059268, + "eval_steps": 100, + "global_step": 335, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.772179532591104e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-335/training_args.bin b/checkpoints/checkpoint-335/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-335/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-340/README.md b/checkpoints/checkpoint-340/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-340/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-340/adapter_config.json b/checkpoints/checkpoint-340/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-340/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-340/adapter_model.safetensors b/checkpoints/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..979371a8b36e03fad66cae07f98eed5e7119e1ff --- /dev/null +++ b/checkpoints/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a1b82076989d68a8bb5434b162c33ab513dc40062c84acbba0ecf4d4d7884d +size 74016 diff --git a/checkpoints/checkpoint-340/chat_template.jinja b/checkpoints/checkpoint-340/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-340/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-340/optimizer.pt b/checkpoints/checkpoint-340/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d79165c7c21aad920bbac61f61110979602364e2 --- /dev/null +++ b/checkpoints/checkpoint-340/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e161aa759d8bf2ae07df9a1d6f2cc45f84c66b3abd24667df388c60d8f97e9a +size 43813 diff --git a/checkpoints/checkpoint-340/rng_state.pth b/checkpoints/checkpoint-340/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-340/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-340/scheduler.pt b/checkpoints/checkpoint-340/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..df54d867c897406ffa4a056858514fed64c40f4b --- /dev/null +++ b/checkpoints/checkpoint-340/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900070de68007cc3c6ee56c9d31e466e40c13bf21dd4b01749ac67187add5810 +size 1465 diff --git a/checkpoints/checkpoint-340/special_tokens_map.json b/checkpoints/checkpoint-340/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-340/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-340/tokenizer.json b/checkpoints/checkpoint-340/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-340/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-340/tokenizer_config.json b/checkpoints/checkpoint-340/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-340/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-340/trainer_state.json b/checkpoints/checkpoint-340/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6cfe7d12be8bdbcdf9148f3644bc59f8fb8c2c54 --- /dev/null +++ b/checkpoints/checkpoint-340/trainer_state.json @@ -0,0 +1,2438 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8575031525851198, + "eval_steps": 100, + "global_step": 340, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.82862871264215e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-340/training_args.bin b/checkpoints/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-345/README.md b/checkpoints/checkpoint-345/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-345/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-345/adapter_config.json b/checkpoints/checkpoint-345/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-345/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-345/adapter_model.safetensors b/checkpoints/checkpoint-345/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..538b5dc3acd166fc4ee223f9eb56708a915a3303 --- /dev/null +++ b/checkpoints/checkpoint-345/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f09a1a3cf3214632896d02a7950b0d0af3c3856c2e5308a985c9c84fb399ce0 +size 74016 diff --git a/checkpoints/checkpoint-345/chat_template.jinja b/checkpoints/checkpoint-345/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-345/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-345/optimizer.pt b/checkpoints/checkpoint-345/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be42477ccc09ad7234699e4a9dbe9aab45a6e2d3 --- /dev/null +++ b/checkpoints/checkpoint-345/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c864f9c089edc15829f6847488b488fc65cfb4663207b60213c0ef3012e52913 +size 43813 diff --git a/checkpoints/checkpoint-345/rng_state.pth b/checkpoints/checkpoint-345/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-345/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-345/scheduler.pt b/checkpoints/checkpoint-345/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..66399066aa94804a063afa1bd7292172abf8fca1 --- /dev/null +++ b/checkpoints/checkpoint-345/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723fcd0755e44b5572ddcb02855462164595b5c3642681a00fb1792a3ac359e7 +size 1465 diff --git a/checkpoints/checkpoint-345/special_tokens_map.json b/checkpoints/checkpoint-345/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-345/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-345/tokenizer.json b/checkpoints/checkpoint-345/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-345/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-345/tokenizer_config.json b/checkpoints/checkpoint-345/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-345/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-345/trainer_state.json b/checkpoints/checkpoint-345/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e76ddc6c3204c8faae2d498bec474bf44d432477 --- /dev/null +++ b/checkpoints/checkpoint-345/trainer_state.json @@ -0,0 +1,2473 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8701134930643127, + "eval_steps": 100, + "global_step": 345, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.885465147757363e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-345/training_args.bin b/checkpoints/checkpoint-345/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-345/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-35/README.md b/checkpoints/checkpoint-35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-35/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-35/adapter_config.json b/checkpoints/checkpoint-35/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-35/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-35/adapter_model.safetensors b/checkpoints/checkpoint-35/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d12ecf31f56494fdab5c8d55a8ff485c11b5769c --- /dev/null +++ b/checkpoints/checkpoint-35/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425d2aaf1a18193705bdb1b45971ea7c7b04f82197ee0acbc19bf5598b18d7a4 +size 74016 diff --git a/checkpoints/checkpoint-35/chat_template.jinja b/checkpoints/checkpoint-35/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-35/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-35/optimizer.pt b/checkpoints/checkpoint-35/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0e17a2ead7752e383d1be6b066f375c8027670d --- /dev/null +++ b/checkpoints/checkpoint-35/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f79a924ec0f77f6af8de7741ae452be86333089431dca8eaaa3c540cfec6334 +size 43813 diff --git a/checkpoints/checkpoint-35/rng_state.pth b/checkpoints/checkpoint-35/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-35/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-35/scheduler.pt b/checkpoints/checkpoint-35/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4db5b36d075c3ec66e75171ff9dbb9bd6b4390c6 --- /dev/null +++ b/checkpoints/checkpoint-35/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137177371c25ac762aaa3fb10253db2d7fec8fe2c0f9111d1947f3ed1b2ddc55 +size 1465 diff --git a/checkpoints/checkpoint-35/special_tokens_map.json b/checkpoints/checkpoint-35/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-35/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-35/tokenizer.json b/checkpoints/checkpoint-35/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-35/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-35/tokenizer_config.json b/checkpoints/checkpoint-35/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-35/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-35/trainer_state.json b/checkpoints/checkpoint-35/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..55b20da355e4ac407d40e5e144ffba29bb8d23e1 --- /dev/null +++ b/checkpoints/checkpoint-35/trainer_state.json @@ -0,0 +1,279 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.08827238335435057, + "eval_steps": 100, + "global_step": 35, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3912897215791104.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-35/training_args.bin b/checkpoints/checkpoint-35/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-35/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-350/README.md b/checkpoints/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-350/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-350/adapter_config.json b/checkpoints/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-350/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-350/adapter_model.safetensors b/checkpoints/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ce2a9dcd68b300f303217639ac89ba0ae00adb1 --- /dev/null +++ b/checkpoints/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e529b4fd9a1b152d48004b548761e7522055fcc017c6283b61738d04e3c8eee +size 74016 diff --git a/checkpoints/checkpoint-350/chat_template.jinja b/checkpoints/checkpoint-350/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-350/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-350/optimizer.pt b/checkpoints/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72b83ce50fac165eb5c539ed5afd3cf9baeb35f9 --- /dev/null +++ b/checkpoints/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d135566278892a1363edd344790956358aa8c5f7808259c99be65b74a7199cc +size 43813 diff --git a/checkpoints/checkpoint-350/rng_state.pth b/checkpoints/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-350/scheduler.pt b/checkpoints/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b01ec7f3cd2d0330399e869d791eccfd68bdcca --- /dev/null +++ b/checkpoints/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b4b9d7b95f36b63f7d9de4c3a62923517aba0add71d5be226316ad459e6d2c3 +size 1465 diff --git a/checkpoints/checkpoint-350/special_tokens_map.json b/checkpoints/checkpoint-350/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-350/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-350/tokenizer.json b/checkpoints/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-350/tokenizer_config.json b/checkpoints/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-350/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-350/trainer_state.json b/checkpoints/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4fd7b3e6d812be3b21d4f057dbdaf2fb641ac93a --- /dev/null +++ b/checkpoints/checkpoint-350/trainer_state.json @@ -0,0 +1,2508 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8827238335435057, + "eval_steps": 100, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.941707191378739e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-350/training_args.bin b/checkpoints/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-355/README.md b/checkpoints/checkpoint-355/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-355/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-355/adapter_config.json b/checkpoints/checkpoint-355/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-355/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-355/adapter_model.safetensors b/checkpoints/checkpoint-355/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b585bca3fd163157fcf67f5ba82dce88e6b0280f --- /dev/null +++ b/checkpoints/checkpoint-355/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22dc47ada54e92f3a4e5e94ea96a503d1a54dccd5b8ceed265697c2c71e38115 +size 74016 diff --git a/checkpoints/checkpoint-355/chat_template.jinja b/checkpoints/checkpoint-355/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-355/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-355/optimizer.pt b/checkpoints/checkpoint-355/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd095aabbb121154b4f17b1b1cf4a096852ee179 --- /dev/null +++ b/checkpoints/checkpoint-355/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695f52a4d752a817fca7f2bd7534da8762ec4fd9d428802a52177dafa58e9c9e +size 43813 diff --git a/checkpoints/checkpoint-355/rng_state.pth b/checkpoints/checkpoint-355/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-355/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-355/scheduler.pt b/checkpoints/checkpoint-355/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8517453278a2dedc786de8d522fcb0b5623db18 --- /dev/null +++ b/checkpoints/checkpoint-355/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d887b6db763dcaa1472a04beee7e65fb024c95f6e3e32f7654105e6a2c5b536 +size 1465 diff --git a/checkpoints/checkpoint-355/special_tokens_map.json b/checkpoints/checkpoint-355/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-355/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-355/tokenizer.json b/checkpoints/checkpoint-355/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-355/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-355/tokenizer_config.json b/checkpoints/checkpoint-355/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-355/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-355/trainer_state.json b/checkpoints/checkpoint-355/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbb841efd1f5e8a1880de841d42fff91b311c008 --- /dev/null +++ b/checkpoints/checkpoint-355/trainer_state.json @@ -0,0 +1,2543 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8953341740226987, + "eval_steps": 100, + "global_step": 355, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.999615332369203e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-355/training_args.bin b/checkpoints/checkpoint-355/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-355/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-360/README.md b/checkpoints/checkpoint-360/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-360/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-360/adapter_config.json b/checkpoints/checkpoint-360/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-360/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-360/adapter_model.safetensors b/checkpoints/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85b36c377d92a204be4154798ebe955706cb0743 --- /dev/null +++ b/checkpoints/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50057cd246991242e559060386871fa8fe639585596c9f8a143bb20670badddb +size 74016 diff --git a/checkpoints/checkpoint-360/chat_template.jinja b/checkpoints/checkpoint-360/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-360/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-360/optimizer.pt b/checkpoints/checkpoint-360/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95495d09ba3d1d66847abbeedca9d3507017dc6 --- /dev/null +++ b/checkpoints/checkpoint-360/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcb39415a55cae99f436662be671ab46da99d473676bf291d9d40bd3341e1abc +size 43813 diff --git a/checkpoints/checkpoint-360/rng_state.pth b/checkpoints/checkpoint-360/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-360/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-360/scheduler.pt b/checkpoints/checkpoint-360/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7337b5f20ad00fff725af21861c92cb7ad98990 --- /dev/null +++ b/checkpoints/checkpoint-360/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6a4e5ddd70e1348d075c7001b166e01fc5e82711e4e01a5558841be28d7c62 +size 1465 diff --git a/checkpoints/checkpoint-360/special_tokens_map.json b/checkpoints/checkpoint-360/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-360/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-360/tokenizer.json b/checkpoints/checkpoint-360/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-360/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-360/tokenizer_config.json b/checkpoints/checkpoint-360/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-360/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-360/trainer_state.json b/checkpoints/checkpoint-360/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9545cb59236bbeb5d0f40430a2413268defff893 --- /dev/null +++ b/checkpoints/checkpoint-360/trainer_state.json @@ -0,0 +1,2578 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9079445145018915, + "eval_steps": 100, + "global_step": 360, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.056640892050637e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-360/training_args.bin b/checkpoints/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-365/README.md b/checkpoints/checkpoint-365/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-365/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-365/adapter_config.json b/checkpoints/checkpoint-365/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-365/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-365/adapter_model.safetensors b/checkpoints/checkpoint-365/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f60740a8d0a4658e6abdccaa386904b14ab35ec --- /dev/null +++ b/checkpoints/checkpoint-365/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2e5a66adde542c43431f3a8731bc6882370e77d2293e784d90de74ae7e5f50 +size 74016 diff --git a/checkpoints/checkpoint-365/chat_template.jinja b/checkpoints/checkpoint-365/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-365/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-365/optimizer.pt b/checkpoints/checkpoint-365/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad26314aa848f430436efaaa676fc325f103c42a --- /dev/null +++ b/checkpoints/checkpoint-365/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23259f75a593a5b8ab2e37b4492ead6365def275cdb52ed484ab63cf21d4f5f +size 43813 diff --git a/checkpoints/checkpoint-365/rng_state.pth b/checkpoints/checkpoint-365/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-365/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-365/scheduler.pt b/checkpoints/checkpoint-365/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd007b56c8ab5b77ff59e0ce6a498e0933bc45f2 --- /dev/null +++ b/checkpoints/checkpoint-365/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aec900879e8a59b017c7b8b8e3db51ad1231c51278e3535c8dc4f2042b9ca0e7 +size 1465 diff --git a/checkpoints/checkpoint-365/special_tokens_map.json b/checkpoints/checkpoint-365/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-365/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-365/tokenizer.json b/checkpoints/checkpoint-365/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-365/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-365/tokenizer_config.json b/checkpoints/checkpoint-365/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-365/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-365/trainer_state.json b/checkpoints/checkpoint-365/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..144d90344fb3174200cc3cf5e5a843bee93f01f3 --- /dev/null +++ b/checkpoints/checkpoint-365/trainer_state.json @@ -0,0 +1,2613 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9205548549810845, + "eval_steps": 100, + "global_step": 365, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.113189137350656e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-365/training_args.bin b/checkpoints/checkpoint-365/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-365/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-370/README.md b/checkpoints/checkpoint-370/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-370/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-370/adapter_config.json b/checkpoints/checkpoint-370/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-370/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-370/adapter_model.safetensors b/checkpoints/checkpoint-370/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bb6e924e5da29674e359cc7d642981c5a56d883 --- /dev/null +++ b/checkpoints/checkpoint-370/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d912be6a89f4b34c25d4bbecb93d6881fd0c217a75ffb5323b7297a93bf110f1 +size 74016 diff --git a/checkpoints/checkpoint-370/chat_template.jinja b/checkpoints/checkpoint-370/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-370/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-370/optimizer.pt b/checkpoints/checkpoint-370/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf571cb6cd2110fca0d70ae42f37bea9877a9cff --- /dev/null +++ b/checkpoints/checkpoint-370/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992464a43f50d07cd59da15c54672b6bda32c00e038f43cb14b89d26440a1cf3 +size 43813 diff --git a/checkpoints/checkpoint-370/rng_state.pth b/checkpoints/checkpoint-370/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-370/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-370/scheduler.pt b/checkpoints/checkpoint-370/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03db78c3aba9ac99403d13298775163b1fd86f7c --- /dev/null +++ b/checkpoints/checkpoint-370/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdafef646b64e17c9b8995204033f230f5834543ec381bb979bef6a8b3204f47 +size 1465 diff --git a/checkpoints/checkpoint-370/special_tokens_map.json b/checkpoints/checkpoint-370/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-370/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-370/tokenizer.json b/checkpoints/checkpoint-370/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-370/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-370/tokenizer_config.json b/checkpoints/checkpoint-370/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-370/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-370/trainer_state.json b/checkpoints/checkpoint-370/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6077ca4a348896eb3f0071f495f9f0570d14c158 --- /dev/null +++ b/checkpoints/checkpoint-370/trainer_state.json @@ -0,0 +1,2648 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9331651954602774, + "eval_steps": 100, + "global_step": 370, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.16972837671895e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-370/training_args.bin b/checkpoints/checkpoint-370/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-370/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-375/README.md b/checkpoints/checkpoint-375/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-375/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-375/adapter_config.json b/checkpoints/checkpoint-375/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-375/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-375/adapter_model.safetensors b/checkpoints/checkpoint-375/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39db294adc270113f8d08bc16e4ed097f72c173a --- /dev/null +++ b/checkpoints/checkpoint-375/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b038bc1aca2400b1cee5525600575750d6aedd0ba7c213bb6a3fe67c14f9fe +size 74016 diff --git a/checkpoints/checkpoint-375/chat_template.jinja b/checkpoints/checkpoint-375/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-375/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-375/optimizer.pt b/checkpoints/checkpoint-375/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fbb1841393febca266ea2efb000e23d5573ff82 --- /dev/null +++ b/checkpoints/checkpoint-375/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0123e90659fecc02f72e978a20deed1226f1b7410e3bdeaf323fb90419f60f4 +size 43813 diff --git a/checkpoints/checkpoint-375/rng_state.pth b/checkpoints/checkpoint-375/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-375/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-375/scheduler.pt b/checkpoints/checkpoint-375/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..35a31646a789f93e40a7e69815ef8307d749d22f --- /dev/null +++ b/checkpoints/checkpoint-375/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4dbc5d2ce8a0a398e1598172f6780e130809a01174103decf4609d8e74ae71 +size 1465 diff --git a/checkpoints/checkpoint-375/special_tokens_map.json b/checkpoints/checkpoint-375/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-375/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-375/tokenizer.json b/checkpoints/checkpoint-375/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-375/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-375/tokenizer_config.json b/checkpoints/checkpoint-375/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-375/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-375/trainer_state.json b/checkpoints/checkpoint-375/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..88b1a8c491158d6182cf13e5356a33ab9cec3ce9 --- /dev/null +++ b/checkpoints/checkpoint-375/trainer_state.json @@ -0,0 +1,2683 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9457755359394704, + "eval_steps": 100, + "global_step": 375, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.225222928007168e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-375/training_args.bin b/checkpoints/checkpoint-375/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-375/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-380/README.md b/checkpoints/checkpoint-380/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-380/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-380/adapter_config.json b/checkpoints/checkpoint-380/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-380/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-380/adapter_model.safetensors b/checkpoints/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15621dbe39809a48f42bf7556b5ff9757c7fe54f --- /dev/null +++ b/checkpoints/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f1280ccc9aefbc7aeaf73d927d940e2f90f0689253eb69d8b2da07355e77eaa +size 74016 diff --git a/checkpoints/checkpoint-380/chat_template.jinja b/checkpoints/checkpoint-380/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-380/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-380/optimizer.pt b/checkpoints/checkpoint-380/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..da36e874ed2042a426c28792ce9b333f2f40ffb7 --- /dev/null +++ b/checkpoints/checkpoint-380/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eba17e62b6123bcc92c5766b15552b6aee8cffb729b278da486ad6575312fa6 +size 43813 diff --git a/checkpoints/checkpoint-380/rng_state.pth b/checkpoints/checkpoint-380/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-380/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-380/scheduler.pt b/checkpoints/checkpoint-380/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..15dc3ebbc8ec02935ea2deee2594f454bb68b3d5 --- /dev/null +++ b/checkpoints/checkpoint-380/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748551ef496b3d17bf00453efd63985252edf9aff7fd68bb47afcddf11dc4f53 +size 1465 diff --git a/checkpoints/checkpoint-380/special_tokens_map.json b/checkpoints/checkpoint-380/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-380/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-380/tokenizer.json b/checkpoints/checkpoint-380/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-380/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-380/tokenizer_config.json b/checkpoints/checkpoint-380/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-380/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-380/trainer_state.json b/checkpoints/checkpoint-380/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2fed0419f3d3c1335a5d076cc7ccf694fa930e25 --- /dev/null +++ b/checkpoints/checkpoint-380/trainer_state.json @@ -0,0 +1,2718 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9583858764186634, + "eval_steps": 100, + "global_step": 380, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 58.08714294433594, + "learning_rate": 1.122448979591837e-06, + "loss": 1.5748, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 58.478763580322266, + "learning_rate": 1.0714285714285714e-06, + "loss": 1.8731, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 50.9328498840332, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.4714, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 63.404449462890625, + "learning_rate": 9.69387755102041e-07, + "loss": 1.6991, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 95.09234619140625, + "learning_rate": 9.183673469387756e-07, + "loss": 1.6495, + "step": 380 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.282239481756877e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-380/training_args.bin b/checkpoints/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-385/README.md b/checkpoints/checkpoint-385/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-385/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-385/adapter_config.json b/checkpoints/checkpoint-385/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-385/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-385/adapter_model.safetensors b/checkpoints/checkpoint-385/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7230e5fcb33c612575e003a7ebd2cea4e3e83250 --- /dev/null +++ b/checkpoints/checkpoint-385/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76b7c09419dbdb0fbef4c11da2b2206f840f40716b06667a17833773fc99d99 +size 74016 diff --git a/checkpoints/checkpoint-385/chat_template.jinja b/checkpoints/checkpoint-385/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-385/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-385/optimizer.pt b/checkpoints/checkpoint-385/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5603934794f31d50ec4025aad7baebfee2dafcba --- /dev/null +++ b/checkpoints/checkpoint-385/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7928b9b2eed1cf18775e9c652e681fea04486ab1347610216352bc6d36c120c5 +size 43813 diff --git a/checkpoints/checkpoint-385/rng_state.pth b/checkpoints/checkpoint-385/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-385/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-385/scheduler.pt b/checkpoints/checkpoint-385/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..616abd0173eff1281a7ba007b86c15ac30bdbe28 --- /dev/null +++ b/checkpoints/checkpoint-385/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb3979508be9ced868286b07b38e1636d00a61c34df75bff86bc1e07ec675cd +size 1465 diff --git a/checkpoints/checkpoint-385/special_tokens_map.json b/checkpoints/checkpoint-385/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-385/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-385/tokenizer.json b/checkpoints/checkpoint-385/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-385/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-385/tokenizer_config.json b/checkpoints/checkpoint-385/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-385/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-385/trainer_state.json b/checkpoints/checkpoint-385/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7ede5b8fbfa301426c59ae09a5d2213decf3d72 --- /dev/null +++ b/checkpoints/checkpoint-385/trainer_state.json @@ -0,0 +1,2753 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9709962168978562, + "eval_steps": 100, + "global_step": 385, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 58.08714294433594, + "learning_rate": 1.122448979591837e-06, + "loss": 1.5748, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 58.478763580322266, + "learning_rate": 1.0714285714285714e-06, + "loss": 1.8731, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 50.9328498840332, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.4714, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 63.404449462890625, + "learning_rate": 9.69387755102041e-07, + "loss": 1.6991, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 95.09234619140625, + "learning_rate": 9.183673469387756e-07, + "loss": 1.6495, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 57.21084213256836, + "learning_rate": 8.673469387755103e-07, + "loss": 1.6225, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 47.711647033691406, + "learning_rate": 8.163265306122449e-07, + "loss": 1.56, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 42.815582275390625, + "learning_rate": 7.653061224489796e-07, + "loss": 1.8021, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 50.893619537353516, + "learning_rate": 7.142857142857143e-07, + "loss": 1.5707, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 68.56468963623047, + "learning_rate": 6.632653061224491e-07, + "loss": 1.6402, + "step": 385 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.338355442334106e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-385/training_args.bin b/checkpoints/checkpoint-385/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-385/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-390/README.md b/checkpoints/checkpoint-390/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-390/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-390/adapter_config.json b/checkpoints/checkpoint-390/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-390/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-390/adapter_model.safetensors b/checkpoints/checkpoint-390/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96daf77d27b84c446e739f13bb7e972beaa5f715 --- /dev/null +++ b/checkpoints/checkpoint-390/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec4c7b8f4f3316ab1d3693bd513ad6681428d8b03d5cfbbd9e560c2913feb3f +size 74016 diff --git a/checkpoints/checkpoint-390/chat_template.jinja b/checkpoints/checkpoint-390/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-390/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-390/optimizer.pt b/checkpoints/checkpoint-390/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b69cedfdfc5b512977d9eeb0859233cb53ef98ab --- /dev/null +++ b/checkpoints/checkpoint-390/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d7e6b8b78bb46b9250b7664955502882073e0afe0434fbc0a656a82fcafa719 +size 43813 diff --git a/checkpoints/checkpoint-390/rng_state.pth b/checkpoints/checkpoint-390/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-390/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-390/scheduler.pt b/checkpoints/checkpoint-390/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..21fb5bcb5b7b8c618a92ce8cc57485217a07ee0b --- /dev/null +++ b/checkpoints/checkpoint-390/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4503455eed959bc7133b6bc9a764e0c234ec4b1f2b0938e0a9631235cd9dbd61 +size 1465 diff --git a/checkpoints/checkpoint-390/special_tokens_map.json b/checkpoints/checkpoint-390/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-390/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-390/tokenizer.json b/checkpoints/checkpoint-390/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-390/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-390/tokenizer_config.json b/checkpoints/checkpoint-390/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-390/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-390/trainer_state.json b/checkpoints/checkpoint-390/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed9f4323671f228c85bed2f527995aac7f98a966 --- /dev/null +++ b/checkpoints/checkpoint-390/trainer_state.json @@ -0,0 +1,2788 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9836065573770492, + "eval_steps": 100, + "global_step": 390, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 58.08714294433594, + "learning_rate": 1.122448979591837e-06, + "loss": 1.5748, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 58.478763580322266, + "learning_rate": 1.0714285714285714e-06, + "loss": 1.8731, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 50.9328498840332, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.4714, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 63.404449462890625, + "learning_rate": 9.69387755102041e-07, + "loss": 1.6991, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 95.09234619140625, + "learning_rate": 9.183673469387756e-07, + "loss": 1.6495, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 57.21084213256836, + "learning_rate": 8.673469387755103e-07, + "loss": 1.6225, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 47.711647033691406, + "learning_rate": 8.163265306122449e-07, + "loss": 1.56, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 42.815582275390625, + "learning_rate": 7.653061224489796e-07, + "loss": 1.8021, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 50.893619537353516, + "learning_rate": 7.142857142857143e-07, + "loss": 1.5707, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 68.56468963623047, + "learning_rate": 6.632653061224491e-07, + "loss": 1.6402, + "step": 385 + }, + { + "epoch": 0.9735182849936949, + "grad_norm": 71.2961654663086, + "learning_rate": 6.122448979591837e-07, + "loss": 1.7148, + "step": 386 + }, + { + "epoch": 0.9760403530895334, + "grad_norm": 64.18102264404297, + "learning_rate": 5.612244897959184e-07, + "loss": 1.5629, + "step": 387 + }, + { + "epoch": 0.978562421185372, + "grad_norm": 58.4742431640625, + "learning_rate": 5.102040816326531e-07, + "loss": 1.5886, + "step": 388 + }, + { + "epoch": 0.9810844892812106, + "grad_norm": 45.84322738647461, + "learning_rate": 4.591836734693878e-07, + "loss": 1.6636, + "step": 389 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 57.07891845703125, + "learning_rate": 4.0816326530612243e-07, + "loss": 1.6842, + "step": 390 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.398199858645402e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-390/training_args.bin b/checkpoints/checkpoint-390/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-390/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-395/README.md b/checkpoints/checkpoint-395/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-395/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-395/adapter_config.json b/checkpoints/checkpoint-395/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-395/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-395/adapter_model.safetensors b/checkpoints/checkpoint-395/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac76801cd369e0fedd9a04d106617c0a8525b3c4 --- /dev/null +++ b/checkpoints/checkpoint-395/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900adc501e81c679fc4c20bdb357e30b39c7174aa7b03e9abbb30dae61fe433b +size 74016 diff --git a/checkpoints/checkpoint-395/chat_template.jinja b/checkpoints/checkpoint-395/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-395/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-395/optimizer.pt b/checkpoints/checkpoint-395/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7aed132f1fb7956db06a002fd37a232d4ba9d899 --- /dev/null +++ b/checkpoints/checkpoint-395/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f74fe0e74a690a0065ca3e2ed322fd49d3c2f687882ca40c11f47f05d8ab0f7 +size 43813 diff --git a/checkpoints/checkpoint-395/rng_state.pth b/checkpoints/checkpoint-395/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-395/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-395/scheduler.pt b/checkpoints/checkpoint-395/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e086a743885a903ad0d44182ff31c9e1921c661a --- /dev/null +++ b/checkpoints/checkpoint-395/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36364582546893069cab50807f8e2f10727a3f0f498662824fb4016a0e32a21 +size 1465 diff --git a/checkpoints/checkpoint-395/special_tokens_map.json b/checkpoints/checkpoint-395/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-395/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-395/tokenizer.json b/checkpoints/checkpoint-395/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-395/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-395/tokenizer_config.json b/checkpoints/checkpoint-395/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-395/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-395/trainer_state.json b/checkpoints/checkpoint-395/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cc9d9c74334c345b397e1aaab59efeea05bed1f5 --- /dev/null +++ b/checkpoints/checkpoint-395/trainer_state.json @@ -0,0 +1,2823 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9962168978562421, + "eval_steps": 100, + "global_step": 395, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 58.08714294433594, + "learning_rate": 1.122448979591837e-06, + "loss": 1.5748, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 58.478763580322266, + "learning_rate": 1.0714285714285714e-06, + "loss": 1.8731, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 50.9328498840332, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.4714, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 63.404449462890625, + "learning_rate": 9.69387755102041e-07, + "loss": 1.6991, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 95.09234619140625, + "learning_rate": 9.183673469387756e-07, + "loss": 1.6495, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 57.21084213256836, + "learning_rate": 8.673469387755103e-07, + "loss": 1.6225, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 47.711647033691406, + "learning_rate": 8.163265306122449e-07, + "loss": 1.56, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 42.815582275390625, + "learning_rate": 7.653061224489796e-07, + "loss": 1.8021, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 50.893619537353516, + "learning_rate": 7.142857142857143e-07, + "loss": 1.5707, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 68.56468963623047, + "learning_rate": 6.632653061224491e-07, + "loss": 1.6402, + "step": 385 + }, + { + "epoch": 0.9735182849936949, + "grad_norm": 71.2961654663086, + "learning_rate": 6.122448979591837e-07, + "loss": 1.7148, + "step": 386 + }, + { + "epoch": 0.9760403530895334, + "grad_norm": 64.18102264404297, + "learning_rate": 5.612244897959184e-07, + "loss": 1.5629, + "step": 387 + }, + { + "epoch": 0.978562421185372, + "grad_norm": 58.4742431640625, + "learning_rate": 5.102040816326531e-07, + "loss": 1.5886, + "step": 388 + }, + { + "epoch": 0.9810844892812106, + "grad_norm": 45.84322738647461, + "learning_rate": 4.591836734693878e-07, + "loss": 1.6636, + "step": 389 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 57.07891845703125, + "learning_rate": 4.0816326530612243e-07, + "loss": 1.6842, + "step": 390 + }, + { + "epoch": 0.9861286254728878, + "grad_norm": 45.1109619140625, + "learning_rate": 3.5714285714285716e-07, + "loss": 1.5951, + "step": 391 + }, + { + "epoch": 0.9886506935687264, + "grad_norm": 54.710269927978516, + "learning_rate": 3.0612244897959183e-07, + "loss": 1.5872, + "step": 392 + }, + { + "epoch": 0.9911727616645649, + "grad_norm": 46.05344772338867, + "learning_rate": 2.5510204081632656e-07, + "loss": 1.6574, + "step": 393 + }, + { + "epoch": 0.9936948297604036, + "grad_norm": 61.935646057128906, + "learning_rate": 2.0408163265306121e-07, + "loss": 1.6483, + "step": 394 + }, + { + "epoch": 0.9962168978562421, + "grad_norm": 78.09939575195312, + "learning_rate": 1.5306122448979592e-07, + "loss": 1.6685, + "step": 395 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.45255065660457e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-395/training_args.bin b/checkpoints/checkpoint-395/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-395/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-397/README.md b/checkpoints/checkpoint-397/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-397/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-397/adapter_config.json b/checkpoints/checkpoint-397/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-397/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-397/adapter_model.safetensors b/checkpoints/checkpoint-397/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f61cc47ce3c05b6efea2ce89b199dc0a5cff9aa --- /dev/null +++ b/checkpoints/checkpoint-397/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23aa3581cae20d89bd01cf7c5cbb8d532c318b9b1044bddb7f14ca77fec9b409 +size 74016 diff --git a/checkpoints/checkpoint-397/chat_template.jinja b/checkpoints/checkpoint-397/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-397/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-397/optimizer.pt b/checkpoints/checkpoint-397/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..075011e9f0d6ed4883f550b28ad0928b913b51a5 --- /dev/null +++ b/checkpoints/checkpoint-397/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4907bf946d8728b4a61a1cc69e831b5feef39b448dc8f6c9fa2e9b5254487c6 +size 43813 diff --git a/checkpoints/checkpoint-397/rng_state.pth b/checkpoints/checkpoint-397/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9d202f3cabb995444284e316dca48902b67f89a --- /dev/null +++ b/checkpoints/checkpoint-397/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f646d2c44059721e44eb93fd2af0841401f183d6ff9b2e067649f1f58cce0f09 +size 14581 diff --git a/checkpoints/checkpoint-397/scheduler.pt b/checkpoints/checkpoint-397/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..102e657d31650051a83d1d7ea3b668972446c16c --- /dev/null +++ b/checkpoints/checkpoint-397/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4500448392b99ca07eb31aa81080ff74976927f1ea697ed84792e9303ad6078 +size 1465 diff --git a/checkpoints/checkpoint-397/special_tokens_map.json b/checkpoints/checkpoint-397/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-397/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-397/tokenizer.json b/checkpoints/checkpoint-397/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-397/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-397/tokenizer_config.json b/checkpoints/checkpoint-397/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-397/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-397/trainer_state.json b/checkpoints/checkpoint-397/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e126c038d34293c316daf0330c612c0aac9718a --- /dev/null +++ b/checkpoints/checkpoint-397/trainer_state.json @@ -0,0 +1,2837 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 397, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + }, + { + "epoch": 0.2421185372005044, + "grad_norm": 65.5256576538086, + "learning_rate": 1.5408163265306123e-05, + "loss": 1.8377, + "step": 96 + }, + { + "epoch": 0.244640605296343, + "grad_norm": 77.13286590576172, + "learning_rate": 1.535714285714286e-05, + "loss": 1.7554, + "step": 97 + }, + { + "epoch": 0.2471626733921816, + "grad_norm": 65.81900787353516, + "learning_rate": 1.530612244897959e-05, + "loss": 1.6929, + "step": 98 + }, + { + "epoch": 0.24968474148802017, + "grad_norm": 39.05877685546875, + "learning_rate": 1.5255102040816327e-05, + "loss": 1.6753, + "step": 99 + }, + { + "epoch": 0.25220680958385877, + "grad_norm": 41.841400146484375, + "learning_rate": 1.5204081632653063e-05, + "loss": 1.6654, + "step": 100 + }, + { + "epoch": 0.25220680958385877, + "eval_loss": 1.7058324813842773, + "eval_runtime": 17.3725, + "eval_samples_per_second": 40.581, + "eval_steps_per_second": 20.319, + "step": 100 + }, + { + "epoch": 0.2547288776796974, + "grad_norm": 55.76837921142578, + "learning_rate": 1.5153061224489798e-05, + "loss": 1.683, + "step": 101 + }, + { + "epoch": 0.2572509457755359, + "grad_norm": 62.13895034790039, + "learning_rate": 1.510204081632653e-05, + "loss": 1.8519, + "step": 102 + }, + { + "epoch": 0.2597730138713745, + "grad_norm": 45.138404846191406, + "learning_rate": 1.5051020408163266e-05, + "loss": 1.7359, + "step": 103 + }, + { + "epoch": 0.26229508196721313, + "grad_norm": 38.36245346069336, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.5741, + "step": 104 + }, + { + "epoch": 0.2648171500630517, + "grad_norm": 61.96607208251953, + "learning_rate": 1.4948979591836736e-05, + "loss": 1.7047, + "step": 105 + }, + { + "epoch": 0.2673392181588903, + "grad_norm": 42.66326904296875, + "learning_rate": 1.4897959183673472e-05, + "loss": 1.6777, + "step": 106 + }, + { + "epoch": 0.2698612862547289, + "grad_norm": 48.25875473022461, + "learning_rate": 1.4846938775510204e-05, + "loss": 1.7601, + "step": 107 + }, + { + "epoch": 0.2723833543505675, + "grad_norm": 36.98008728027344, + "learning_rate": 1.479591836734694e-05, + "loss": 1.7191, + "step": 108 + }, + { + "epoch": 0.27490542244640603, + "grad_norm": 35.65924072265625, + "learning_rate": 1.4744897959183676e-05, + "loss": 1.7149, + "step": 109 + }, + { + "epoch": 0.27742749054224464, + "grad_norm": 42.08740234375, + "learning_rate": 1.469387755102041e-05, + "loss": 1.7385, + "step": 110 + }, + { + "epoch": 0.27994955863808324, + "grad_norm": 51.34683609008789, + "learning_rate": 1.4642857142857144e-05, + "loss": 1.7557, + "step": 111 + }, + { + "epoch": 0.28247162673392184, + "grad_norm": 32.70396423339844, + "learning_rate": 1.4591836734693878e-05, + "loss": 1.7553, + "step": 112 + }, + { + "epoch": 0.2849936948297604, + "grad_norm": 46.8604736328125, + "learning_rate": 1.4540816326530614e-05, + "loss": 1.6614, + "step": 113 + }, + { + "epoch": 0.287515762925599, + "grad_norm": 47.428794860839844, + "learning_rate": 1.448979591836735e-05, + "loss": 1.6713, + "step": 114 + }, + { + "epoch": 0.2900378310214376, + "grad_norm": 63.776058197021484, + "learning_rate": 1.4438775510204083e-05, + "loss": 1.4853, + "step": 115 + }, + { + "epoch": 0.29255989911727615, + "grad_norm": 68.13321685791016, + "learning_rate": 1.4387755102040817e-05, + "loss": 1.6409, + "step": 116 + }, + { + "epoch": 0.29508196721311475, + "grad_norm": 30.4958553314209, + "learning_rate": 1.4336734693877551e-05, + "loss": 1.7011, + "step": 117 + }, + { + "epoch": 0.29760403530895335, + "grad_norm": 40.45919418334961, + "learning_rate": 1.4285714285714287e-05, + "loss": 1.7823, + "step": 118 + }, + { + "epoch": 0.30012610340479196, + "grad_norm": 91.41016387939453, + "learning_rate": 1.4234693877551023e-05, + "loss": 1.7417, + "step": 119 + }, + { + "epoch": 0.3026481715006305, + "grad_norm": 52.76211929321289, + "learning_rate": 1.4183673469387755e-05, + "loss": 1.5838, + "step": 120 + }, + { + "epoch": 0.3051702395964691, + "grad_norm": 35.29008483886719, + "learning_rate": 1.4132653061224491e-05, + "loss": 1.6948, + "step": 121 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 39.410675048828125, + "learning_rate": 1.4081632653061225e-05, + "loss": 1.7751, + "step": 122 + }, + { + "epoch": 0.31021437578814626, + "grad_norm": 43.52021026611328, + "learning_rate": 1.403061224489796e-05, + "loss": 1.6385, + "step": 123 + }, + { + "epoch": 0.31273644388398486, + "grad_norm": 48.39508819580078, + "learning_rate": 1.3979591836734696e-05, + "loss": 1.6298, + "step": 124 + }, + { + "epoch": 0.31525851197982346, + "grad_norm": 41.7908821105957, + "learning_rate": 1.3928571428571429e-05, + "loss": 1.7272, + "step": 125 + }, + { + "epoch": 0.31778058007566207, + "grad_norm": 52.86972427368164, + "learning_rate": 1.3877551020408165e-05, + "loss": 1.8452, + "step": 126 + }, + { + "epoch": 0.3203026481715006, + "grad_norm": 53.07705307006836, + "learning_rate": 1.38265306122449e-05, + "loss": 1.7903, + "step": 127 + }, + { + "epoch": 0.3228247162673392, + "grad_norm": 46.49085998535156, + "learning_rate": 1.3775510204081634e-05, + "loss": 1.5486, + "step": 128 + }, + { + "epoch": 0.3253467843631778, + "grad_norm": 43.22178649902344, + "learning_rate": 1.3724489795918368e-05, + "loss": 1.735, + "step": 129 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 62.47540283203125, + "learning_rate": 1.3673469387755102e-05, + "loss": 1.7223, + "step": 130 + }, + { + "epoch": 0.33039092055485497, + "grad_norm": 53.00700759887695, + "learning_rate": 1.3622448979591838e-05, + "loss": 1.6867, + "step": 131 + }, + { + "epoch": 0.3329129886506936, + "grad_norm": 44.43991470336914, + "learning_rate": 1.3571428571428574e-05, + "loss": 1.8764, + "step": 132 + }, + { + "epoch": 0.3354350567465322, + "grad_norm": 39.670066833496094, + "learning_rate": 1.3520408163265306e-05, + "loss": 1.636, + "step": 133 + }, + { + "epoch": 0.3379571248423707, + "grad_norm": 47.06390380859375, + "learning_rate": 1.3469387755102042e-05, + "loss": 1.6492, + "step": 134 + }, + { + "epoch": 0.34047919293820933, + "grad_norm": 62.545841217041016, + "learning_rate": 1.3418367346938776e-05, + "loss": 1.6861, + "step": 135 + }, + { + "epoch": 0.34300126103404793, + "grad_norm": 54.30208969116211, + "learning_rate": 1.3367346938775512e-05, + "loss": 1.6878, + "step": 136 + }, + { + "epoch": 0.3455233291298865, + "grad_norm": 42.69244384765625, + "learning_rate": 1.3316326530612247e-05, + "loss": 1.6575, + "step": 137 + }, + { + "epoch": 0.3480453972257251, + "grad_norm": 48.067604064941406, + "learning_rate": 1.326530612244898e-05, + "loss": 1.6622, + "step": 138 + }, + { + "epoch": 0.3505674653215637, + "grad_norm": 70.65290832519531, + "learning_rate": 1.3214285714285716e-05, + "loss": 1.8153, + "step": 139 + }, + { + "epoch": 0.3530895334174023, + "grad_norm": 83.261474609375, + "learning_rate": 1.316326530612245e-05, + "loss": 1.7836, + "step": 140 + }, + { + "epoch": 0.35561160151324084, + "grad_norm": 88.9097900390625, + "learning_rate": 1.3112244897959185e-05, + "loss": 1.5952, + "step": 141 + }, + { + "epoch": 0.35813366960907944, + "grad_norm": 62.275699615478516, + "learning_rate": 1.3061224489795918e-05, + "loss": 1.5893, + "step": 142 + }, + { + "epoch": 0.36065573770491804, + "grad_norm": 46.35886764526367, + "learning_rate": 1.3010204081632653e-05, + "loss": 1.7599, + "step": 143 + }, + { + "epoch": 0.36317780580075665, + "grad_norm": 52.75099563598633, + "learning_rate": 1.2959183673469389e-05, + "loss": 1.6112, + "step": 144 + }, + { + "epoch": 0.3656998738965952, + "grad_norm": 56.87018966674805, + "learning_rate": 1.2908163265306123e-05, + "loss": 1.6707, + "step": 145 + }, + { + "epoch": 0.3682219419924338, + "grad_norm": 101.14485931396484, + "learning_rate": 1.2857142857142859e-05, + "loss": 1.7671, + "step": 146 + }, + { + "epoch": 0.3707440100882724, + "grad_norm": 59.226531982421875, + "learning_rate": 1.2806122448979591e-05, + "loss": 1.6923, + "step": 147 + }, + { + "epoch": 0.37326607818411095, + "grad_norm": 74.40206909179688, + "learning_rate": 1.2755102040816327e-05, + "loss": 1.7094, + "step": 148 + }, + { + "epoch": 0.37578814627994955, + "grad_norm": 62.53060531616211, + "learning_rate": 1.2704081632653063e-05, + "loss": 1.6542, + "step": 149 + }, + { + "epoch": 0.37831021437578816, + "grad_norm": 38.56242370605469, + "learning_rate": 1.2653061224489798e-05, + "loss": 1.6094, + "step": 150 + }, + { + "epoch": 0.38083228247162676, + "grad_norm": 72.4624252319336, + "learning_rate": 1.260204081632653e-05, + "loss": 1.5669, + "step": 151 + }, + { + "epoch": 0.3833543505674653, + "grad_norm": 55.217411041259766, + "learning_rate": 1.2551020408163267e-05, + "loss": 1.7986, + "step": 152 + }, + { + "epoch": 0.3858764186633039, + "grad_norm": 62.509765625, + "learning_rate": 1.25e-05, + "loss": 1.5193, + "step": 153 + }, + { + "epoch": 0.3883984867591425, + "grad_norm": 57.38125228881836, + "learning_rate": 1.2448979591836736e-05, + "loss": 1.7127, + "step": 154 + }, + { + "epoch": 0.39092055485498106, + "grad_norm": 55.615352630615234, + "learning_rate": 1.2397959183673472e-05, + "loss": 1.737, + "step": 155 + }, + { + "epoch": 0.39344262295081966, + "grad_norm": 39.61804962158203, + "learning_rate": 1.2346938775510204e-05, + "loss": 1.6305, + "step": 156 + }, + { + "epoch": 0.39596469104665827, + "grad_norm": 47.65972137451172, + "learning_rate": 1.229591836734694e-05, + "loss": 1.8287, + "step": 157 + }, + { + "epoch": 0.39848675914249687, + "grad_norm": 71.03924560546875, + "learning_rate": 1.2244897959183674e-05, + "loss": 1.6611, + "step": 158 + }, + { + "epoch": 0.4010088272383354, + "grad_norm": 39.84426498413086, + "learning_rate": 1.219387755102041e-05, + "loss": 1.5782, + "step": 159 + }, + { + "epoch": 0.403530895334174, + "grad_norm": 65.24462127685547, + "learning_rate": 1.2142857142857142e-05, + "loss": 1.7443, + "step": 160 + }, + { + "epoch": 0.4060529634300126, + "grad_norm": 53.24254608154297, + "learning_rate": 1.2091836734693878e-05, + "loss": 1.7684, + "step": 161 + }, + { + "epoch": 0.4085750315258512, + "grad_norm": 68.38136291503906, + "learning_rate": 1.2040816326530614e-05, + "loss": 1.7266, + "step": 162 + }, + { + "epoch": 0.4110970996216898, + "grad_norm": 50.510318756103516, + "learning_rate": 1.1989795918367348e-05, + "loss": 1.7898, + "step": 163 + }, + { + "epoch": 0.4136191677175284, + "grad_norm": 52.10852813720703, + "learning_rate": 1.1938775510204084e-05, + "loss": 1.6409, + "step": 164 + }, + { + "epoch": 0.416141235813367, + "grad_norm": 47.451011657714844, + "learning_rate": 1.1887755102040816e-05, + "loss": 1.7012, + "step": 165 + }, + { + "epoch": 0.41866330390920553, + "grad_norm": 72.23477935791016, + "learning_rate": 1.1836734693877552e-05, + "loss": 1.6302, + "step": 166 + }, + { + "epoch": 0.42118537200504413, + "grad_norm": 52.041316986083984, + "learning_rate": 1.1785714285714287e-05, + "loss": 1.6227, + "step": 167 + }, + { + "epoch": 0.42370744010088274, + "grad_norm": 55.48360824584961, + "learning_rate": 1.1734693877551021e-05, + "loss": 1.6287, + "step": 168 + }, + { + "epoch": 0.4262295081967213, + "grad_norm": 58.351219177246094, + "learning_rate": 1.1683673469387755e-05, + "loss": 1.7909, + "step": 169 + }, + { + "epoch": 0.4287515762925599, + "grad_norm": 44.11323165893555, + "learning_rate": 1.1632653061224491e-05, + "loss": 1.6937, + "step": 170 + }, + { + "epoch": 0.4312736443883985, + "grad_norm": 53.49913024902344, + "learning_rate": 1.1581632653061225e-05, + "loss": 1.5843, + "step": 171 + }, + { + "epoch": 0.4337957124842371, + "grad_norm": 43.77488708496094, + "learning_rate": 1.1530612244897961e-05, + "loss": 1.6643, + "step": 172 + }, + { + "epoch": 0.43631778058007564, + "grad_norm": 64.34185028076172, + "learning_rate": 1.1479591836734697e-05, + "loss": 1.8263, + "step": 173 + }, + { + "epoch": 0.43883984867591425, + "grad_norm": 59.59367752075195, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.5969, + "step": 174 + }, + { + "epoch": 0.44136191677175285, + "grad_norm": 42.75151062011719, + "learning_rate": 1.1377551020408165e-05, + "loss": 1.6321, + "step": 175 + }, + { + "epoch": 0.44388398486759145, + "grad_norm": 61.3978157043457, + "learning_rate": 1.1326530612244899e-05, + "loss": 1.7589, + "step": 176 + }, + { + "epoch": 0.44640605296343, + "grad_norm": 45.83797073364258, + "learning_rate": 1.1275510204081635e-05, + "loss": 1.6514, + "step": 177 + }, + { + "epoch": 0.4489281210592686, + "grad_norm": 52.09745788574219, + "learning_rate": 1.1224489795918367e-05, + "loss": 1.5945, + "step": 178 + }, + { + "epoch": 0.4514501891551072, + "grad_norm": 48.397891998291016, + "learning_rate": 1.1173469387755103e-05, + "loss": 1.5852, + "step": 179 + }, + { + "epoch": 0.45397225725094575, + "grad_norm": 62.47504425048828, + "learning_rate": 1.1122448979591838e-05, + "loss": 1.873, + "step": 180 + }, + { + "epoch": 0.45649432534678436, + "grad_norm": 81.45391082763672, + "learning_rate": 1.1071428571428572e-05, + "loss": 1.7931, + "step": 181 + }, + { + "epoch": 0.45901639344262296, + "grad_norm": 45.61745834350586, + "learning_rate": 1.1020408163265306e-05, + "loss": 1.6525, + "step": 182 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 43.88615798950195, + "learning_rate": 1.096938775510204e-05, + "loss": 1.6817, + "step": 183 + }, + { + "epoch": 0.4640605296343001, + "grad_norm": 57.98750686645508, + "learning_rate": 1.0918367346938776e-05, + "loss": 1.6739, + "step": 184 + }, + { + "epoch": 0.4665825977301387, + "grad_norm": 62.238956451416016, + "learning_rate": 1.0867346938775512e-05, + "loss": 1.5318, + "step": 185 + }, + { + "epoch": 0.4691046658259773, + "grad_norm": 44.14122772216797, + "learning_rate": 1.0816326530612246e-05, + "loss": 1.6098, + "step": 186 + }, + { + "epoch": 0.47162673392181587, + "grad_norm": 67.29253387451172, + "learning_rate": 1.076530612244898e-05, + "loss": 1.6838, + "step": 187 + }, + { + "epoch": 0.47414880201765447, + "grad_norm": 73.57019805908203, + "learning_rate": 1.0714285714285714e-05, + "loss": 1.6678, + "step": 188 + }, + { + "epoch": 0.4766708701134931, + "grad_norm": 49.36915969848633, + "learning_rate": 1.066326530612245e-05, + "loss": 1.5204, + "step": 189 + }, + { + "epoch": 0.4791929382093317, + "grad_norm": 49.2264518737793, + "learning_rate": 1.0612244897959186e-05, + "loss": 1.5933, + "step": 190 + }, + { + "epoch": 0.4817150063051702, + "grad_norm": 57.03235626220703, + "learning_rate": 1.0561224489795918e-05, + "loss": 1.763, + "step": 191 + }, + { + "epoch": 0.4842370744010088, + "grad_norm": 58.081783294677734, + "learning_rate": 1.0510204081632654e-05, + "loss": 1.7214, + "step": 192 + }, + { + "epoch": 0.48675914249684743, + "grad_norm": 38.1114501953125, + "learning_rate": 1.045918367346939e-05, + "loss": 1.4882, + "step": 193 + }, + { + "epoch": 0.489281210592686, + "grad_norm": 78.7602767944336, + "learning_rate": 1.0408163265306123e-05, + "loss": 1.6893, + "step": 194 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 42.24647903442383, + "learning_rate": 1.0357142857142859e-05, + "loss": 1.8366, + "step": 195 + }, + { + "epoch": 0.4943253467843632, + "grad_norm": 74.10948944091797, + "learning_rate": 1.0306122448979591e-05, + "loss": 1.8239, + "step": 196 + }, + { + "epoch": 0.4968474148802018, + "grad_norm": 53.59373474121094, + "learning_rate": 1.0255102040816327e-05, + "loss": 1.6588, + "step": 197 + }, + { + "epoch": 0.49936948297604034, + "grad_norm": 49.268436431884766, + "learning_rate": 1.0204081632653063e-05, + "loss": 1.5371, + "step": 198 + }, + { + "epoch": 0.501891551071879, + "grad_norm": 64.90384674072266, + "learning_rate": 1.0153061224489797e-05, + "loss": 1.6578, + "step": 199 + }, + { + "epoch": 0.5044136191677175, + "grad_norm": 73.85359954833984, + "learning_rate": 1.0102040816326531e-05, + "loss": 1.6869, + "step": 200 + }, + { + "epoch": 0.5044136191677175, + "eval_loss": 1.624376654624939, + "eval_runtime": 17.2241, + "eval_samples_per_second": 40.931, + "eval_steps_per_second": 20.495, + "step": 200 + }, + { + "epoch": 0.5069356872635561, + "grad_norm": 58.34244155883789, + "learning_rate": 1.0051020408163265e-05, + "loss": 1.5548, + "step": 201 + }, + { + "epoch": 0.5094577553593947, + "grad_norm": 58.29341506958008, + "learning_rate": 1e-05, + "loss": 1.784, + "step": 202 + }, + { + "epoch": 0.5119798234552333, + "grad_norm": 59.047447204589844, + "learning_rate": 9.948979591836737e-06, + "loss": 1.6739, + "step": 203 + }, + { + "epoch": 0.5145018915510718, + "grad_norm": 60.774898529052734, + "learning_rate": 9.89795918367347e-06, + "loss": 1.7087, + "step": 204 + }, + { + "epoch": 0.5170239596469105, + "grad_norm": 63.295013427734375, + "learning_rate": 9.846938775510205e-06, + "loss": 1.706, + "step": 205 + }, + { + "epoch": 0.519546027742749, + "grad_norm": 66.50640869140625, + "learning_rate": 9.795918367346939e-06, + "loss": 1.8127, + "step": 206 + }, + { + "epoch": 0.5220680958385876, + "grad_norm": 49.714107513427734, + "learning_rate": 9.744897959183674e-06, + "loss": 1.6251, + "step": 207 + }, + { + "epoch": 0.5245901639344263, + "grad_norm": 62.14101791381836, + "learning_rate": 9.693877551020408e-06, + "loss": 1.5424, + "step": 208 + }, + { + "epoch": 0.5271122320302648, + "grad_norm": 69.4269027709961, + "learning_rate": 9.642857142857144e-06, + "loss": 1.5577, + "step": 209 + }, + { + "epoch": 0.5296343001261034, + "grad_norm": 116.79376983642578, + "learning_rate": 9.591836734693878e-06, + "loss": 1.7376, + "step": 210 + }, + { + "epoch": 0.532156368221942, + "grad_norm": 72.32835388183594, + "learning_rate": 9.540816326530612e-06, + "loss": 1.6292, + "step": 211 + }, + { + "epoch": 0.5346784363177806, + "grad_norm": 68.02676391601562, + "learning_rate": 9.489795918367348e-06, + "loss": 1.756, + "step": 212 + }, + { + "epoch": 0.5372005044136192, + "grad_norm": 41.7132682800293, + "learning_rate": 9.438775510204082e-06, + "loss": 1.6576, + "step": 213 + }, + { + "epoch": 0.5397225725094578, + "grad_norm": 55.55889129638672, + "learning_rate": 9.387755102040818e-06, + "loss": 1.7124, + "step": 214 + }, + { + "epoch": 0.5422446406052963, + "grad_norm": 63.634586334228516, + "learning_rate": 9.336734693877552e-06, + "loss": 1.7123, + "step": 215 + }, + { + "epoch": 0.544766708701135, + "grad_norm": 81.96355438232422, + "learning_rate": 9.285714285714288e-06, + "loss": 1.6823, + "step": 216 + }, + { + "epoch": 0.5472887767969735, + "grad_norm": 74.68587493896484, + "learning_rate": 9.234693877551022e-06, + "loss": 1.7233, + "step": 217 + }, + { + "epoch": 0.5498108448928121, + "grad_norm": 57.257789611816406, + "learning_rate": 9.183673469387756e-06, + "loss": 1.6514, + "step": 218 + }, + { + "epoch": 0.5523329129886507, + "grad_norm": 48.31911849975586, + "learning_rate": 9.13265306122449e-06, + "loss": 1.7837, + "step": 219 + }, + { + "epoch": 0.5548549810844893, + "grad_norm": 52.49345016479492, + "learning_rate": 9.081632653061225e-06, + "loss": 1.6013, + "step": 220 + }, + { + "epoch": 0.5573770491803278, + "grad_norm": 60.87263870239258, + "learning_rate": 9.03061224489796e-06, + "loss": 1.677, + "step": 221 + }, + { + "epoch": 0.5598991172761665, + "grad_norm": 76.93538665771484, + "learning_rate": 8.979591836734695e-06, + "loss": 1.7444, + "step": 222 + }, + { + "epoch": 0.562421185372005, + "grad_norm": 49.461509704589844, + "learning_rate": 8.92857142857143e-06, + "loss": 1.4863, + "step": 223 + }, + { + "epoch": 0.5649432534678437, + "grad_norm": 43.88584899902344, + "learning_rate": 8.877551020408163e-06, + "loss": 1.708, + "step": 224 + }, + { + "epoch": 0.5674653215636822, + "grad_norm": 50.26557922363281, + "learning_rate": 8.826530612244899e-06, + "loss": 1.5033, + "step": 225 + }, + { + "epoch": 0.5699873896595208, + "grad_norm": 52.14901351928711, + "learning_rate": 8.775510204081633e-06, + "loss": 1.713, + "step": 226 + }, + { + "epoch": 0.5725094577553594, + "grad_norm": 38.479000091552734, + "learning_rate": 8.724489795918369e-06, + "loss": 1.7115, + "step": 227 + }, + { + "epoch": 0.575031525851198, + "grad_norm": 45.17754364013672, + "learning_rate": 8.673469387755103e-06, + "loss": 1.5331, + "step": 228 + }, + { + "epoch": 0.5775535939470365, + "grad_norm": 59.97049331665039, + "learning_rate": 8.622448979591837e-06, + "loss": 1.6301, + "step": 229 + }, + { + "epoch": 0.5800756620428752, + "grad_norm": 63.48531723022461, + "learning_rate": 8.571428571428571e-06, + "loss": 1.7224, + "step": 230 + }, + { + "epoch": 0.5825977301387137, + "grad_norm": 47.30621337890625, + "learning_rate": 8.520408163265307e-06, + "loss": 1.6898, + "step": 231 + }, + { + "epoch": 0.5851197982345523, + "grad_norm": 68.28963470458984, + "learning_rate": 8.469387755102042e-06, + "loss": 1.7713, + "step": 232 + }, + { + "epoch": 0.587641866330391, + "grad_norm": 48.91388702392578, + "learning_rate": 8.418367346938776e-06, + "loss": 1.6196, + "step": 233 + }, + { + "epoch": 0.5901639344262295, + "grad_norm": 69.12262725830078, + "learning_rate": 8.36734693877551e-06, + "loss": 1.5595, + "step": 234 + }, + { + "epoch": 0.592686002522068, + "grad_norm": 64.75733947753906, + "learning_rate": 8.316326530612246e-06, + "loss": 1.5329, + "step": 235 + }, + { + "epoch": 0.5952080706179067, + "grad_norm": 65.38609313964844, + "learning_rate": 8.26530612244898e-06, + "loss": 1.4807, + "step": 236 + }, + { + "epoch": 0.5977301387137453, + "grad_norm": 54.65237808227539, + "learning_rate": 8.214285714285714e-06, + "loss": 1.7062, + "step": 237 + }, + { + "epoch": 0.6002522068095839, + "grad_norm": 49.22687530517578, + "learning_rate": 8.16326530612245e-06, + "loss": 1.5107, + "step": 238 + }, + { + "epoch": 0.6027742749054225, + "grad_norm": 61.179691314697266, + "learning_rate": 8.112244897959184e-06, + "loss": 1.6022, + "step": 239 + }, + { + "epoch": 0.605296343001261, + "grad_norm": 54.46000671386719, + "learning_rate": 8.06122448979592e-06, + "loss": 1.6054, + "step": 240 + }, + { + "epoch": 0.6078184110970997, + "grad_norm": 50.293243408203125, + "learning_rate": 8.010204081632654e-06, + "loss": 1.657, + "step": 241 + }, + { + "epoch": 0.6103404791929382, + "grad_norm": 68.89249420166016, + "learning_rate": 7.959183673469388e-06, + "loss": 1.661, + "step": 242 + }, + { + "epoch": 0.6128625472887768, + "grad_norm": 61.56959915161133, + "learning_rate": 7.908163265306124e-06, + "loss": 1.4934, + "step": 243 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 57.62371063232422, + "learning_rate": 7.857142857142858e-06, + "loss": 1.6396, + "step": 244 + }, + { + "epoch": 0.617906683480454, + "grad_norm": 55.0053596496582, + "learning_rate": 7.806122448979593e-06, + "loss": 1.7401, + "step": 245 + }, + { + "epoch": 0.6204287515762925, + "grad_norm": 42.153709411621094, + "learning_rate": 7.755102040816327e-06, + "loss": 1.5954, + "step": 246 + }, + { + "epoch": 0.6229508196721312, + "grad_norm": 36.70948028564453, + "learning_rate": 7.704081632653061e-06, + "loss": 1.6299, + "step": 247 + }, + { + "epoch": 0.6254728877679697, + "grad_norm": 57.706451416015625, + "learning_rate": 7.653061224489796e-06, + "loss": 1.6684, + "step": 248 + }, + { + "epoch": 0.6279949558638083, + "grad_norm": 48.05001449584961, + "learning_rate": 7.602040816326531e-06, + "loss": 1.7143, + "step": 249 + }, + { + "epoch": 0.6305170239596469, + "grad_norm": 56.3585090637207, + "learning_rate": 7.551020408163265e-06, + "loss": 1.6261, + "step": 250 + }, + { + "epoch": 0.6330390920554855, + "grad_norm": 45.11580276489258, + "learning_rate": 7.500000000000001e-06, + "loss": 1.6013, + "step": 251 + }, + { + "epoch": 0.6355611601513241, + "grad_norm": 48.9489860534668, + "learning_rate": 7.448979591836736e-06, + "loss": 1.5591, + "step": 252 + }, + { + "epoch": 0.6380832282471627, + "grad_norm": 54.34056854248047, + "learning_rate": 7.39795918367347e-06, + "loss": 1.6868, + "step": 253 + }, + { + "epoch": 0.6406052963430012, + "grad_norm": 64.83470153808594, + "learning_rate": 7.346938775510205e-06, + "loss": 1.6854, + "step": 254 + }, + { + "epoch": 0.6431273644388399, + "grad_norm": 38.010223388671875, + "learning_rate": 7.295918367346939e-06, + "loss": 1.6316, + "step": 255 + }, + { + "epoch": 0.6456494325346784, + "grad_norm": 84.18378448486328, + "learning_rate": 7.244897959183675e-06, + "loss": 1.6047, + "step": 256 + }, + { + "epoch": 0.648171500630517, + "grad_norm": 77.3735580444336, + "learning_rate": 7.193877551020409e-06, + "loss": 1.6184, + "step": 257 + }, + { + "epoch": 0.6506935687263556, + "grad_norm": 46.59174728393555, + "learning_rate": 7.1428571428571436e-06, + "loss": 1.5215, + "step": 258 + }, + { + "epoch": 0.6532156368221942, + "grad_norm": 40.80696487426758, + "learning_rate": 7.091836734693878e-06, + "loss": 1.7838, + "step": 259 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 65.00389862060547, + "learning_rate": 7.0408163265306125e-06, + "loss": 1.6476, + "step": 260 + }, + { + "epoch": 0.6582597730138714, + "grad_norm": 52.06635665893555, + "learning_rate": 6.989795918367348e-06, + "loss": 1.556, + "step": 261 + }, + { + "epoch": 0.6607818411097099, + "grad_norm": 46.52763366699219, + "learning_rate": 6.938775510204082e-06, + "loss": 1.6171, + "step": 262 + }, + { + "epoch": 0.6633039092055486, + "grad_norm": 49.21574020385742, + "learning_rate": 6.887755102040817e-06, + "loss": 1.6289, + "step": 263 + }, + { + "epoch": 0.6658259773013872, + "grad_norm": 43.80045700073242, + "learning_rate": 6.836734693877551e-06, + "loss": 1.6164, + "step": 264 + }, + { + "epoch": 0.6683480453972257, + "grad_norm": 38.01715850830078, + "learning_rate": 6.785714285714287e-06, + "loss": 1.5516, + "step": 265 + }, + { + "epoch": 0.6708701134930644, + "grad_norm": 68.9593505859375, + "learning_rate": 6.734693877551021e-06, + "loss": 1.6217, + "step": 266 + }, + { + "epoch": 0.6733921815889029, + "grad_norm": 53.72611999511719, + "learning_rate": 6.683673469387756e-06, + "loss": 1.6378, + "step": 267 + }, + { + "epoch": 0.6759142496847415, + "grad_norm": 32.71125030517578, + "learning_rate": 6.63265306122449e-06, + "loss": 1.7422, + "step": 268 + }, + { + "epoch": 0.6784363177805801, + "grad_norm": 50.81645965576172, + "learning_rate": 6.581632653061225e-06, + "loss": 1.5741, + "step": 269 + }, + { + "epoch": 0.6809583858764187, + "grad_norm": 59.44023513793945, + "learning_rate": 6.530612244897959e-06, + "loss": 1.5977, + "step": 270 + }, + { + "epoch": 0.6834804539722572, + "grad_norm": 43.35381317138672, + "learning_rate": 6.4795918367346946e-06, + "loss": 1.7446, + "step": 271 + }, + { + "epoch": 0.6860025220680959, + "grad_norm": 65.33541107177734, + "learning_rate": 6.4285714285714295e-06, + "loss": 1.5325, + "step": 272 + }, + { + "epoch": 0.6885245901639344, + "grad_norm": 55.88346481323242, + "learning_rate": 6.3775510204081635e-06, + "loss": 1.6958, + "step": 273 + }, + { + "epoch": 0.691046658259773, + "grad_norm": 48.47056579589844, + "learning_rate": 6.326530612244899e-06, + "loss": 1.7204, + "step": 274 + }, + { + "epoch": 0.6935687263556116, + "grad_norm": 55.698204040527344, + "learning_rate": 6.275510204081633e-06, + "loss": 1.7359, + "step": 275 + }, + { + "epoch": 0.6960907944514502, + "grad_norm": 52.59063720703125, + "learning_rate": 6.224489795918368e-06, + "loss": 1.6562, + "step": 276 + }, + { + "epoch": 0.6986128625472888, + "grad_norm": 72.51795959472656, + "learning_rate": 6.173469387755102e-06, + "loss": 1.7142, + "step": 277 + }, + { + "epoch": 0.7011349306431274, + "grad_norm": 68.86798095703125, + "learning_rate": 6.122448979591837e-06, + "loss": 1.6575, + "step": 278 + }, + { + "epoch": 0.7036569987389659, + "grad_norm": 78.17684173583984, + "learning_rate": 6.071428571428571e-06, + "loss": 1.6694, + "step": 279 + }, + { + "epoch": 0.7061790668348046, + "grad_norm": 60.10198974609375, + "learning_rate": 6.020408163265307e-06, + "loss": 1.6236, + "step": 280 + }, + { + "epoch": 0.7087011349306431, + "grad_norm": 51.694454193115234, + "learning_rate": 5.969387755102042e-06, + "loss": 1.71, + "step": 281 + }, + { + "epoch": 0.7112232030264817, + "grad_norm": 69.73673248291016, + "learning_rate": 5.918367346938776e-06, + "loss": 1.5107, + "step": 282 + }, + { + "epoch": 0.7137452711223203, + "grad_norm": 59.43289566040039, + "learning_rate": 5.867346938775511e-06, + "loss": 1.7001, + "step": 283 + }, + { + "epoch": 0.7162673392181589, + "grad_norm": 56.3236198425293, + "learning_rate": 5.816326530612246e-06, + "loss": 1.6852, + "step": 284 + }, + { + "epoch": 0.7187894073139974, + "grad_norm": 58.143680572509766, + "learning_rate": 5.7653061224489805e-06, + "loss": 1.6675, + "step": 285 + }, + { + "epoch": 0.7213114754098361, + "grad_norm": 70.73607635498047, + "learning_rate": 5.7142857142857145e-06, + "loss": 1.5949, + "step": 286 + }, + { + "epoch": 0.7238335435056746, + "grad_norm": 47.955745697021484, + "learning_rate": 5.663265306122449e-06, + "loss": 1.6456, + "step": 287 + }, + { + "epoch": 0.7263556116015133, + "grad_norm": 66.31688690185547, + "learning_rate": 5.6122448979591834e-06, + "loss": 1.724, + "step": 288 + }, + { + "epoch": 0.7288776796973518, + "grad_norm": 64.84517669677734, + "learning_rate": 5.561224489795919e-06, + "loss": 1.6266, + "step": 289 + }, + { + "epoch": 0.7313997477931904, + "grad_norm": 49.25682067871094, + "learning_rate": 5.510204081632653e-06, + "loss": 1.4352, + "step": 290 + }, + { + "epoch": 0.733921815889029, + "grad_norm": 77.06358337402344, + "learning_rate": 5.459183673469388e-06, + "loss": 1.5504, + "step": 291 + }, + { + "epoch": 0.7364438839848676, + "grad_norm": 59.66337203979492, + "learning_rate": 5.408163265306123e-06, + "loss": 1.5528, + "step": 292 + }, + { + "epoch": 0.7389659520807061, + "grad_norm": 63.75739288330078, + "learning_rate": 5.357142857142857e-06, + "loss": 1.6034, + "step": 293 + }, + { + "epoch": 0.7414880201765448, + "grad_norm": 80.54058074951172, + "learning_rate": 5.306122448979593e-06, + "loss": 1.6414, + "step": 294 + }, + { + "epoch": 0.7440100882723834, + "grad_norm": 40.65614700317383, + "learning_rate": 5.255102040816327e-06, + "loss": 1.6339, + "step": 295 + }, + { + "epoch": 0.7465321563682219, + "grad_norm": 50.28155517578125, + "learning_rate": 5.204081632653062e-06, + "loss": 1.6472, + "step": 296 + }, + { + "epoch": 0.7490542244640606, + "grad_norm": 52.285640716552734, + "learning_rate": 5.153061224489796e-06, + "loss": 1.6791, + "step": 297 + }, + { + "epoch": 0.7515762925598991, + "grad_norm": 58.316741943359375, + "learning_rate": 5.1020408163265315e-06, + "loss": 1.7848, + "step": 298 + }, + { + "epoch": 0.7540983606557377, + "grad_norm": 69.99073028564453, + "learning_rate": 5.0510204081632655e-06, + "loss": 1.6932, + "step": 299 + }, + { + "epoch": 0.7566204287515763, + "grad_norm": 50.15757751464844, + "learning_rate": 5e-06, + "loss": 1.5822, + "step": 300 + }, + { + "epoch": 0.7566204287515763, + "eval_loss": 1.6064321994781494, + "eval_runtime": 17.1267, + "eval_samples_per_second": 41.164, + "eval_steps_per_second": 20.611, + "step": 300 + }, + { + "epoch": 0.7591424968474149, + "grad_norm": 58.328125, + "learning_rate": 4.948979591836735e-06, + "loss": 1.5885, + "step": 301 + }, + { + "epoch": 0.7616645649432535, + "grad_norm": 49.442935943603516, + "learning_rate": 4.897959183673469e-06, + "loss": 1.5638, + "step": 302 + }, + { + "epoch": 0.7641866330390921, + "grad_norm": 43.516578674316406, + "learning_rate": 4.846938775510204e-06, + "loss": 1.5877, + "step": 303 + }, + { + "epoch": 0.7667087011349306, + "grad_norm": 52.609676361083984, + "learning_rate": 4.795918367346939e-06, + "loss": 1.5974, + "step": 304 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 63.037689208984375, + "learning_rate": 4.744897959183674e-06, + "loss": 1.6105, + "step": 305 + }, + { + "epoch": 0.7717528373266078, + "grad_norm": 85.72171783447266, + "learning_rate": 4.693877551020409e-06, + "loss": 1.5709, + "step": 306 + }, + { + "epoch": 0.7742749054224464, + "grad_norm": 62.73603820800781, + "learning_rate": 4.642857142857144e-06, + "loss": 1.6479, + "step": 307 + }, + { + "epoch": 0.776796973518285, + "grad_norm": 66.09080505371094, + "learning_rate": 4.591836734693878e-06, + "loss": 1.5911, + "step": 308 + }, + { + "epoch": 0.7793190416141236, + "grad_norm": 46.583953857421875, + "learning_rate": 4.540816326530613e-06, + "loss": 1.6626, + "step": 309 + }, + { + "epoch": 0.7818411097099621, + "grad_norm": 82.06871795654297, + "learning_rate": 4.489795918367348e-06, + "loss": 1.6232, + "step": 310 + }, + { + "epoch": 0.7843631778058008, + "grad_norm": 44.730926513671875, + "learning_rate": 4.438775510204082e-06, + "loss": 1.4801, + "step": 311 + }, + { + "epoch": 0.7868852459016393, + "grad_norm": 43.200496673583984, + "learning_rate": 4.3877551020408165e-06, + "loss": 1.5245, + "step": 312 + }, + { + "epoch": 0.7894073139974779, + "grad_norm": 51.78078842163086, + "learning_rate": 4.336734693877551e-06, + "loss": 1.5752, + "step": 313 + }, + { + "epoch": 0.7919293820933165, + "grad_norm": 70.77361297607422, + "learning_rate": 4.2857142857142855e-06, + "loss": 1.6275, + "step": 314 + }, + { + "epoch": 0.7944514501891551, + "grad_norm": 40.50032043457031, + "learning_rate": 4.234693877551021e-06, + "loss": 1.6755, + "step": 315 + }, + { + "epoch": 0.7969735182849937, + "grad_norm": 60.885643005371094, + "learning_rate": 4.183673469387755e-06, + "loss": 1.6408, + "step": 316 + }, + { + "epoch": 0.7994955863808323, + "grad_norm": 61.231101989746094, + "learning_rate": 4.13265306122449e-06, + "loss": 1.6985, + "step": 317 + }, + { + "epoch": 0.8020176544766708, + "grad_norm": 49.62914276123047, + "learning_rate": 4.081632653061225e-06, + "loss": 1.5764, + "step": 318 + }, + { + "epoch": 0.8045397225725095, + "grad_norm": 61.66952896118164, + "learning_rate": 4.03061224489796e-06, + "loss": 1.6033, + "step": 319 + }, + { + "epoch": 0.807061790668348, + "grad_norm": 44.09907150268555, + "learning_rate": 3.979591836734694e-06, + "loss": 1.6194, + "step": 320 + }, + { + "epoch": 0.8095838587641866, + "grad_norm": 34.39897155761719, + "learning_rate": 3.928571428571429e-06, + "loss": 1.6418, + "step": 321 + }, + { + "epoch": 0.8121059268600253, + "grad_norm": 55.254425048828125, + "learning_rate": 3.877551020408164e-06, + "loss": 1.6815, + "step": 322 + }, + { + "epoch": 0.8146279949558638, + "grad_norm": 50.335357666015625, + "learning_rate": 3.826530612244898e-06, + "loss": 1.7137, + "step": 323 + }, + { + "epoch": 0.8171500630517023, + "grad_norm": 63.603485107421875, + "learning_rate": 3.7755102040816327e-06, + "loss": 1.6325, + "step": 324 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 63.02777862548828, + "learning_rate": 3.724489795918368e-06, + "loss": 1.6566, + "step": 325 + }, + { + "epoch": 0.8221941992433796, + "grad_norm": 76.89656829833984, + "learning_rate": 3.6734693877551024e-06, + "loss": 1.6102, + "step": 326 + }, + { + "epoch": 0.8247162673392182, + "grad_norm": 57.54744338989258, + "learning_rate": 3.6224489795918373e-06, + "loss": 1.6604, + "step": 327 + }, + { + "epoch": 0.8272383354350568, + "grad_norm": 59.62877655029297, + "learning_rate": 3.5714285714285718e-06, + "loss": 1.5599, + "step": 328 + }, + { + "epoch": 0.8297604035308953, + "grad_norm": 37.507080078125, + "learning_rate": 3.5204081632653062e-06, + "loss": 1.7255, + "step": 329 + }, + { + "epoch": 0.832282471626734, + "grad_norm": 49.42286682128906, + "learning_rate": 3.469387755102041e-06, + "loss": 1.6631, + "step": 330 + }, + { + "epoch": 0.8348045397225725, + "grad_norm": 65.57738494873047, + "learning_rate": 3.4183673469387756e-06, + "loss": 1.8506, + "step": 331 + }, + { + "epoch": 0.8373266078184111, + "grad_norm": 60.7876091003418, + "learning_rate": 3.3673469387755105e-06, + "loss": 1.6044, + "step": 332 + }, + { + "epoch": 0.8398486759142497, + "grad_norm": 58.215980529785156, + "learning_rate": 3.316326530612245e-06, + "loss": 1.6344, + "step": 333 + }, + { + "epoch": 0.8423707440100883, + "grad_norm": 61.29468536376953, + "learning_rate": 3.2653061224489794e-06, + "loss": 1.8094, + "step": 334 + }, + { + "epoch": 0.8448928121059268, + "grad_norm": 52.7525749206543, + "learning_rate": 3.2142857142857147e-06, + "loss": 1.5978, + "step": 335 + }, + { + "epoch": 0.8474148802017655, + "grad_norm": 65.65593719482422, + "learning_rate": 3.1632653061224496e-06, + "loss": 1.6343, + "step": 336 + }, + { + "epoch": 0.849936948297604, + "grad_norm": 65.74759674072266, + "learning_rate": 3.112244897959184e-06, + "loss": 1.7476, + "step": 337 + }, + { + "epoch": 0.8524590163934426, + "grad_norm": 47.83057403564453, + "learning_rate": 3.0612244897959185e-06, + "loss": 1.6581, + "step": 338 + }, + { + "epoch": 0.8549810844892812, + "grad_norm": 59.682037353515625, + "learning_rate": 3.0102040816326534e-06, + "loss": 1.5384, + "step": 339 + }, + { + "epoch": 0.8575031525851198, + "grad_norm": 52.117523193359375, + "learning_rate": 2.959183673469388e-06, + "loss": 1.6949, + "step": 340 + }, + { + "epoch": 0.8600252206809584, + "grad_norm": 61.13319778442383, + "learning_rate": 2.908163265306123e-06, + "loss": 1.6705, + "step": 341 + }, + { + "epoch": 0.862547288776797, + "grad_norm": 54.28220748901367, + "learning_rate": 2.8571428571428573e-06, + "loss": 1.5708, + "step": 342 + }, + { + "epoch": 0.8650693568726355, + "grad_norm": 75.28093719482422, + "learning_rate": 2.8061224489795917e-06, + "loss": 1.5957, + "step": 343 + }, + { + "epoch": 0.8675914249684742, + "grad_norm": 61.688819885253906, + "learning_rate": 2.7551020408163266e-06, + "loss": 1.5535, + "step": 344 + }, + { + "epoch": 0.8701134930643127, + "grad_norm": 69.136962890625, + "learning_rate": 2.7040816326530615e-06, + "loss": 1.7311, + "step": 345 + }, + { + "epoch": 0.8726355611601513, + "grad_norm": 56.65056228637695, + "learning_rate": 2.6530612244897964e-06, + "loss": 1.637, + "step": 346 + }, + { + "epoch": 0.8751576292559899, + "grad_norm": 50.866050720214844, + "learning_rate": 2.602040816326531e-06, + "loss": 1.679, + "step": 347 + }, + { + "epoch": 0.8776796973518285, + "grad_norm": 63.647003173828125, + "learning_rate": 2.5510204081632657e-06, + "loss": 1.5854, + "step": 348 + }, + { + "epoch": 0.880201765447667, + "grad_norm": 51.19943618774414, + "learning_rate": 2.5e-06, + "loss": 1.5621, + "step": 349 + }, + { + "epoch": 0.8827238335435057, + "grad_norm": 35.77592468261719, + "learning_rate": 2.4489795918367347e-06, + "loss": 1.6725, + "step": 350 + }, + { + "epoch": 0.8852459016393442, + "grad_norm": 70.21007537841797, + "learning_rate": 2.3979591836734696e-06, + "loss": 1.6404, + "step": 351 + }, + { + "epoch": 0.8877679697351829, + "grad_norm": 50.95806121826172, + "learning_rate": 2.3469387755102044e-06, + "loss": 1.6173, + "step": 352 + }, + { + "epoch": 0.8902900378310215, + "grad_norm": 53.76484298706055, + "learning_rate": 2.295918367346939e-06, + "loss": 1.6009, + "step": 353 + }, + { + "epoch": 0.89281210592686, + "grad_norm": 55.03848648071289, + "learning_rate": 2.244897959183674e-06, + "loss": 1.7589, + "step": 354 + }, + { + "epoch": 0.8953341740226987, + "grad_norm": 50.98552322387695, + "learning_rate": 2.1938775510204083e-06, + "loss": 1.7033, + "step": 355 + }, + { + "epoch": 0.8978562421185372, + "grad_norm": 55.68265914916992, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.7263, + "step": 356 + }, + { + "epoch": 0.9003783102143758, + "grad_norm": 43.9735221862793, + "learning_rate": 2.0918367346938776e-06, + "loss": 1.6582, + "step": 357 + }, + { + "epoch": 0.9029003783102144, + "grad_norm": 60.161624908447266, + "learning_rate": 2.0408163265306125e-06, + "loss": 1.6799, + "step": 358 + }, + { + "epoch": 0.905422446406053, + "grad_norm": 40.86426544189453, + "learning_rate": 1.989795918367347e-06, + "loss": 1.7094, + "step": 359 + }, + { + "epoch": 0.9079445145018915, + "grad_norm": 50.64308166503906, + "learning_rate": 1.938775510204082e-06, + "loss": 1.5415, + "step": 360 + }, + { + "epoch": 0.9104665825977302, + "grad_norm": 55.8780517578125, + "learning_rate": 1.8877551020408163e-06, + "loss": 1.6845, + "step": 361 + }, + { + "epoch": 0.9129886506935687, + "grad_norm": 61.939876556396484, + "learning_rate": 1.8367346938775512e-06, + "loss": 1.7934, + "step": 362 + }, + { + "epoch": 0.9155107187894073, + "grad_norm": 59.91012191772461, + "learning_rate": 1.7857142857142859e-06, + "loss": 1.5695, + "step": 363 + }, + { + "epoch": 0.9180327868852459, + "grad_norm": 55.318817138671875, + "learning_rate": 1.7346938775510206e-06, + "loss": 1.62, + "step": 364 + }, + { + "epoch": 0.9205548549810845, + "grad_norm": 58.615821838378906, + "learning_rate": 1.6836734693877552e-06, + "loss": 1.6821, + "step": 365 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 50.651973724365234, + "learning_rate": 1.6326530612244897e-06, + "loss": 1.6564, + "step": 366 + }, + { + "epoch": 0.9255989911727617, + "grad_norm": 65.09803009033203, + "learning_rate": 1.5816326530612248e-06, + "loss": 1.5436, + "step": 367 + }, + { + "epoch": 0.9281210592686002, + "grad_norm": 65.5836181640625, + "learning_rate": 1.5306122448979593e-06, + "loss": 1.7057, + "step": 368 + }, + { + "epoch": 0.9306431273644389, + "grad_norm": 44.93059539794922, + "learning_rate": 1.479591836734694e-06, + "loss": 1.5976, + "step": 369 + }, + { + "epoch": 0.9331651954602774, + "grad_norm": 48.956687927246094, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.5902, + "step": 370 + }, + { + "epoch": 0.935687263556116, + "grad_norm": 53.956939697265625, + "learning_rate": 1.3775510204081633e-06, + "loss": 1.7054, + "step": 371 + }, + { + "epoch": 0.9382093316519546, + "grad_norm": 43.42762756347656, + "learning_rate": 1.3265306122448982e-06, + "loss": 1.6939, + "step": 372 + }, + { + "epoch": 0.9407313997477932, + "grad_norm": 64.85823822021484, + "learning_rate": 1.2755102040816329e-06, + "loss": 1.5837, + "step": 373 + }, + { + "epoch": 0.9432534678436317, + "grad_norm": 46.02948760986328, + "learning_rate": 1.2244897959183673e-06, + "loss": 1.6697, + "step": 374 + }, + { + "epoch": 0.9457755359394704, + "grad_norm": 51.031593322753906, + "learning_rate": 1.1734693877551022e-06, + "loss": 1.7086, + "step": 375 + }, + { + "epoch": 0.9482976040353089, + "grad_norm": 58.08714294433594, + "learning_rate": 1.122448979591837e-06, + "loss": 1.5748, + "step": 376 + }, + { + "epoch": 0.9508196721311475, + "grad_norm": 58.478763580322266, + "learning_rate": 1.0714285714285714e-06, + "loss": 1.8731, + "step": 377 + }, + { + "epoch": 0.9533417402269861, + "grad_norm": 50.9328498840332, + "learning_rate": 1.0204081632653063e-06, + "loss": 1.4714, + "step": 378 + }, + { + "epoch": 0.9558638083228247, + "grad_norm": 63.404449462890625, + "learning_rate": 9.69387755102041e-07, + "loss": 1.6991, + "step": 379 + }, + { + "epoch": 0.9583858764186634, + "grad_norm": 95.09234619140625, + "learning_rate": 9.183673469387756e-07, + "loss": 1.6495, + "step": 380 + }, + { + "epoch": 0.9609079445145019, + "grad_norm": 57.21084213256836, + "learning_rate": 8.673469387755103e-07, + "loss": 1.6225, + "step": 381 + }, + { + "epoch": 0.9634300126103404, + "grad_norm": 47.711647033691406, + "learning_rate": 8.163265306122449e-07, + "loss": 1.56, + "step": 382 + }, + { + "epoch": 0.9659520807061791, + "grad_norm": 42.815582275390625, + "learning_rate": 7.653061224489796e-07, + "loss": 1.8021, + "step": 383 + }, + { + "epoch": 0.9684741488020177, + "grad_norm": 50.893619537353516, + "learning_rate": 7.142857142857143e-07, + "loss": 1.5707, + "step": 384 + }, + { + "epoch": 0.9709962168978562, + "grad_norm": 68.56468963623047, + "learning_rate": 6.632653061224491e-07, + "loss": 1.6402, + "step": 385 + }, + { + "epoch": 0.9735182849936949, + "grad_norm": 71.2961654663086, + "learning_rate": 6.122448979591837e-07, + "loss": 1.7148, + "step": 386 + }, + { + "epoch": 0.9760403530895334, + "grad_norm": 64.18102264404297, + "learning_rate": 5.612244897959184e-07, + "loss": 1.5629, + "step": 387 + }, + { + "epoch": 0.978562421185372, + "grad_norm": 58.4742431640625, + "learning_rate": 5.102040816326531e-07, + "loss": 1.5886, + "step": 388 + }, + { + "epoch": 0.9810844892812106, + "grad_norm": 45.84322738647461, + "learning_rate": 4.591836734693878e-07, + "loss": 1.6636, + "step": 389 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 57.07891845703125, + "learning_rate": 4.0816326530612243e-07, + "loss": 1.6842, + "step": 390 + }, + { + "epoch": 0.9861286254728878, + "grad_norm": 45.1109619140625, + "learning_rate": 3.5714285714285716e-07, + "loss": 1.5951, + "step": 391 + }, + { + "epoch": 0.9886506935687264, + "grad_norm": 54.710269927978516, + "learning_rate": 3.0612244897959183e-07, + "loss": 1.5872, + "step": 392 + }, + { + "epoch": 0.9911727616645649, + "grad_norm": 46.05344772338867, + "learning_rate": 2.5510204081632656e-07, + "loss": 1.6574, + "step": 393 + }, + { + "epoch": 0.9936948297604036, + "grad_norm": 61.935646057128906, + "learning_rate": 2.0408163265306121e-07, + "loss": 1.6483, + "step": 394 + }, + { + "epoch": 0.9962168978562421, + "grad_norm": 78.09939575195312, + "learning_rate": 1.5306122448979592e-07, + "loss": 1.6685, + "step": 395 + }, + { + "epoch": 0.9987389659520807, + "grad_norm": 46.65782165527344, + "learning_rate": 1.0204081632653061e-07, + "loss": 1.584, + "step": 396 + }, + { + "epoch": 1.0, + "grad_norm": 60.066410064697266, + "learning_rate": 5.1020408163265303e-08, + "loss": 1.5958, + "step": 397 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.469941110765158e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-397/training_args.bin b/checkpoints/checkpoint-397/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-397/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-40/README.md b/checkpoints/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-40/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-40/adapter_config.json b/checkpoints/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-40/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-40/adapter_model.safetensors b/checkpoints/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..826882a5184baab6f23d370d92182e7b586c9e41 --- /dev/null +++ b/checkpoints/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a714a2e4a10bbe03ef7289447ff090f3162db74a4bb1959e0a422e7cb7477c +size 74016 diff --git a/checkpoints/checkpoint-40/chat_template.jinja b/checkpoints/checkpoint-40/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-40/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-40/optimizer.pt b/checkpoints/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed987363b584f19ab7fb3c8b2312ded810b6a323 --- /dev/null +++ b/checkpoints/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22aefc556e828ac1a6b5e3190663f060e2bec63fba40a7164a2dfced527ccd08 +size 43813 diff --git a/checkpoints/checkpoint-40/rng_state.pth b/checkpoints/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-40/scheduler.pt b/checkpoints/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b1f64a206ecdaf12302f8c61fbdfcdf3d401770 --- /dev/null +++ b/checkpoints/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7320820b24f6e9e145917287113edf73d343aa5dc43801b0de920e78fe1d07 +size 1465 diff --git a/checkpoints/checkpoint-40/special_tokens_map.json b/checkpoints/checkpoint-40/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-40/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-40/tokenizer.json b/checkpoints/checkpoint-40/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-40/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-40/tokenizer_config.json b/checkpoints/checkpoint-40/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-40/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-40/trainer_state.json b/checkpoints/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3aa246e2695e46a6f949af08e21c0f9d7148a470 --- /dev/null +++ b/checkpoints/checkpoint-40/trainer_state.json @@ -0,0 +1,314 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1008827238335435, + "eval_steps": 100, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4462709347590144.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-40/training_args.bin b/checkpoints/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-45/README.md b/checkpoints/checkpoint-45/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-45/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-45/adapter_config.json b/checkpoints/checkpoint-45/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-45/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-45/adapter_model.safetensors b/checkpoints/checkpoint-45/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58ea1ced8f6f333ec51018aa7822dd2fd4e23367 --- /dev/null +++ b/checkpoints/checkpoint-45/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d92f2cbfe072fa7d6c3f50402554da094cb02e0de9a5acb645c7d3bdb77a1241 +size 74016 diff --git a/checkpoints/checkpoint-45/chat_template.jinja b/checkpoints/checkpoint-45/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-45/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-45/optimizer.pt b/checkpoints/checkpoint-45/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9812898930cd6829e9f35f7240b76fbe87a0b8e6 --- /dev/null +++ b/checkpoints/checkpoint-45/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8617e994ba26b79378ce4cc8add3afb5369050a59b3e200489df7a8eceb7514c +size 43813 diff --git a/checkpoints/checkpoint-45/rng_state.pth b/checkpoints/checkpoint-45/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-45/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-45/scheduler.pt b/checkpoints/checkpoint-45/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ce8877debf0f01a971cf2d6cdcb4e9f07ca2d94 --- /dev/null +++ b/checkpoints/checkpoint-45/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eeac0b18221a321507160f72b2da507a7bd76f7eb431ea88e3581ef0d362d97 +size 1465 diff --git a/checkpoints/checkpoint-45/special_tokens_map.json b/checkpoints/checkpoint-45/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-45/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-45/tokenizer.json b/checkpoints/checkpoint-45/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-45/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-45/tokenizer_config.json b/checkpoints/checkpoint-45/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-45/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-45/trainer_state.json b/checkpoints/checkpoint-45/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ed65a636f500d48830dd2b985a959b902a1ef9a --- /dev/null +++ b/checkpoints/checkpoint-45/trainer_state.json @@ -0,0 +1,349 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.11349306431273644, + "eval_steps": 100, + "global_step": 45, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5024139131314176.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-45/training_args.bin b/checkpoints/checkpoint-45/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-45/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-5/README.md b/checkpoints/checkpoint-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-5/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-5/adapter_config.json b/checkpoints/checkpoint-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-5/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-5/adapter_model.safetensors b/checkpoints/checkpoint-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..026fa81dc407a9e162be3ab1a724b64e8a97d425 --- /dev/null +++ b/checkpoints/checkpoint-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fd19daad35e27ec10e7408336edaf3c8d756bb7d849ee9b50e6bb2d45559c3 +size 74016 diff --git a/checkpoints/checkpoint-5/chat_template.jinja b/checkpoints/checkpoint-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-5/optimizer.pt b/checkpoints/checkpoint-5/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4290c646aab3b41d7e25466ff4722430810a1516 --- /dev/null +++ b/checkpoints/checkpoint-5/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecb6f878c8f63271394802d6428b781191acea1d2cf9da444f2aaa9f2806404 +size 43813 diff --git a/checkpoints/checkpoint-5/rng_state.pth b/checkpoints/checkpoint-5/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-5/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-5/scheduler.pt b/checkpoints/checkpoint-5/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e5fcb0de2475471a08a088ce0b0718626be7bda --- /dev/null +++ b/checkpoints/checkpoint-5/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db050dbe914db734bc96fdb629c74c18c9b2fab2ad07b1d70a98d0f27d570dd +size 1465 diff --git a/checkpoints/checkpoint-5/special_tokens_map.json b/checkpoints/checkpoint-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-5/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-5/tokenizer.json b/checkpoints/checkpoint-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-5/tokenizer_config.json b/checkpoints/checkpoint-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-5/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-5/trainer_state.json b/checkpoints/checkpoint-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8b97092991e0b18c40b0c26067dae610dbe033e8 --- /dev/null +++ b/checkpoints/checkpoint-5/trainer_state.json @@ -0,0 +1,69 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.012610340479192938, + "eval_steps": 100, + "global_step": 5, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 558457826254848.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-5/training_args.bin b/checkpoints/checkpoint-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-50/README.md b/checkpoints/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-50/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-50/adapter_config.json b/checkpoints/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-50/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-50/adapter_model.safetensors b/checkpoints/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a62d8469983ec7e055d2e7eb2ba26ba0b9220e3 --- /dev/null +++ b/checkpoints/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac6db47a553857a603b08e3ccaa82869c3d1f7fd983ff6c4e2112bee1081a7e +size 74016 diff --git a/checkpoints/checkpoint-50/chat_template.jinja b/checkpoints/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-50/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-50/optimizer.pt b/checkpoints/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..66cc4609c49ba70515abcd154426d40ec41a5228 --- /dev/null +++ b/checkpoints/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831cdf7694746f3f4070f94d68b3a015f59160124ad5b49fe6223f8eb57181b6 +size 43813 diff --git a/checkpoints/checkpoint-50/rng_state.pth b/checkpoints/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-50/scheduler.pt b/checkpoints/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a7d6934a47813cf67df98390376c6e7782e2629 --- /dev/null +++ b/checkpoints/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbe5d688e2941da9b24366bdc398946537cb9fbf130677019ec7d7c46b6d9c +size 1465 diff --git a/checkpoints/checkpoint-50/special_tokens_map.json b/checkpoints/checkpoint-50/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-50/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-50/tokenizer.json b/checkpoints/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-50/tokenizer_config.json b/checkpoints/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-50/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-50/trainer_state.json b/checkpoints/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be1e9fa0231f4a297cdae6a639a45c27b77da291 --- /dev/null +++ b/checkpoints/checkpoint-50/trainer_state.json @@ -0,0 +1,384 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.12610340479192939, + "eval_steps": 100, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5587099923431424.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-50/training_args.bin b/checkpoints/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-55/README.md b/checkpoints/checkpoint-55/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-55/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-55/adapter_config.json b/checkpoints/checkpoint-55/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-55/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-55/adapter_model.safetensors b/checkpoints/checkpoint-55/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c32b07868f7bd0056b8b7db6e7e9f219b491548a --- /dev/null +++ b/checkpoints/checkpoint-55/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bbe852ba685d13cc452d7f7751c245890e9faac6e8de2d68e1715ba6eb43d74 +size 74016 diff --git a/checkpoints/checkpoint-55/chat_template.jinja b/checkpoints/checkpoint-55/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-55/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-55/optimizer.pt b/checkpoints/checkpoint-55/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2fb9b6d8397aaa0cf8359f1892b79ef9db316b6 --- /dev/null +++ b/checkpoints/checkpoint-55/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d8b4d2afec570fd4cf502f0e1b0647cc2d097180da626de1d216b735927a28 +size 43813 diff --git a/checkpoints/checkpoint-55/rng_state.pth b/checkpoints/checkpoint-55/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-55/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-55/scheduler.pt b/checkpoints/checkpoint-55/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0207787ae0cad36845974388986cc7da7317724 --- /dev/null +++ b/checkpoints/checkpoint-55/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4790decd1d378b55b22b106f58c39ae6f8dbb2abbcbf1051b3a382804e6ba7b +size 1465 diff --git a/checkpoints/checkpoint-55/special_tokens_map.json b/checkpoints/checkpoint-55/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-55/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-55/tokenizer.json b/checkpoints/checkpoint-55/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-55/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-55/tokenizer_config.json b/checkpoints/checkpoint-55/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-55/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-55/trainer_state.json b/checkpoints/checkpoint-55/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..786dc589b5d571d90906282147b62a82bf753459 --- /dev/null +++ b/checkpoints/checkpoint-55/trainer_state.json @@ -0,0 +1,419 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.13871374527112232, + "eval_steps": 100, + "global_step": 55, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6154023325507584.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-55/training_args.bin b/checkpoints/checkpoint-55/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-55/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-60/README.md b/checkpoints/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-60/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-60/adapter_config.json b/checkpoints/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-60/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-60/adapter_model.safetensors b/checkpoints/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..490553e2e9e1f37e068257934760adb9b75fff93 --- /dev/null +++ b/checkpoints/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba972edf31fb2b3c312e1fbd076df35c92eb9e1a9d3ce7a8ef84cc5a9f7b01aa +size 74016 diff --git a/checkpoints/checkpoint-60/chat_template.jinja b/checkpoints/checkpoint-60/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-60/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-60/optimizer.pt b/checkpoints/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b54e5d7b87660b06f247f4785f92d235428068c4 --- /dev/null +++ b/checkpoints/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9cf3eeb514d6ff9a058dda0ca3c20eedeaa0accea82cbefd68c776711fd34e +size 43813 diff --git a/checkpoints/checkpoint-60/rng_state.pth b/checkpoints/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-60/scheduler.pt b/checkpoints/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..923f00fac4bdd50e26ccfc6475f5b4094c9fefb4 --- /dev/null +++ b/checkpoints/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5f48399c27701bd2074faaab5e8762b9db88f1ccf0640167a823b2052d7dba +size 1465 diff --git a/checkpoints/checkpoint-60/special_tokens_map.json b/checkpoints/checkpoint-60/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-60/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-60/tokenizer.json b/checkpoints/checkpoint-60/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-60/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-60/tokenizer_config.json b/checkpoints/checkpoint-60/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-60/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-60/trainer_state.json b/checkpoints/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3801a8e31c35acfd8ffcd50da56812691b1e08b8 --- /dev/null +++ b/checkpoints/checkpoint-60/trainer_state.json @@ -0,0 +1,454 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15132408575031525, + "eval_steps": 100, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6720316312363008.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-60/training_args.bin b/checkpoints/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-65/README.md b/checkpoints/checkpoint-65/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-65/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-65/adapter_config.json b/checkpoints/checkpoint-65/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-65/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-65/adapter_model.safetensors b/checkpoints/checkpoint-65/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67851e694d4f3152141c2fd2c6348c29cbf2c970 --- /dev/null +++ b/checkpoints/checkpoint-65/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a54ac304615e732079f57f3937ca9e60a93d2b1b8e2680e3e9041bec69e544 +size 74016 diff --git a/checkpoints/checkpoint-65/chat_template.jinja b/checkpoints/checkpoint-65/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-65/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-65/optimizer.pt b/checkpoints/checkpoint-65/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..34d8cb8fbd7423016495a6be95c89c32034c6288 --- /dev/null +++ b/checkpoints/checkpoint-65/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9117a722360db78ca305fa5d37571eb539b3f86f0f640f09e173e3ef677823 +size 43813 diff --git a/checkpoints/checkpoint-65/rng_state.pth b/checkpoints/checkpoint-65/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-65/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-65/scheduler.pt b/checkpoints/checkpoint-65/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9df5556dbbb4474aa88d6a4b84fcd4ddde5ed4a --- /dev/null +++ b/checkpoints/checkpoint-65/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa293470273dc516de62895af8e9dfa2f5ce9b6ab20b1ee8f57d24d1394bf59 +size 1465 diff --git a/checkpoints/checkpoint-65/special_tokens_map.json b/checkpoints/checkpoint-65/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-65/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-65/tokenizer.json b/checkpoints/checkpoint-65/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-65/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-65/tokenizer_config.json b/checkpoints/checkpoint-65/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-65/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-65/trainer_state.json b/checkpoints/checkpoint-65/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1c5daab69f2ea13d1ba71acaa4bce359bcba866 --- /dev/null +++ b/checkpoints/checkpoint-65/trainer_state.json @@ -0,0 +1,489 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.16393442622950818, + "eval_steps": 100, + "global_step": 65, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7265355300347904.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-65/training_args.bin b/checkpoints/checkpoint-65/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-65/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-70/README.md b/checkpoints/checkpoint-70/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-70/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-70/adapter_config.json b/checkpoints/checkpoint-70/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-70/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-70/adapter_model.safetensors b/checkpoints/checkpoint-70/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71e756b5af487aff7c578088f78b345fac492530 --- /dev/null +++ b/checkpoints/checkpoint-70/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6c7c59a94445fe53b95b615f2a97232670739c9920b13adcfec9356f7f7c84 +size 74016 diff --git a/checkpoints/checkpoint-70/chat_template.jinja b/checkpoints/checkpoint-70/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-70/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-70/optimizer.pt b/checkpoints/checkpoint-70/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e7c11eb3ca54a08e2a1534a470e4e3f17a3e48e --- /dev/null +++ b/checkpoints/checkpoint-70/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3046060a118342425a18467dc021dcfea53af144cb53938cee3dd0f68db156 +size 43813 diff --git a/checkpoints/checkpoint-70/rng_state.pth b/checkpoints/checkpoint-70/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-70/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-70/scheduler.pt b/checkpoints/checkpoint-70/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fdf10f317e599a4f76181a03963bc88e092c469 --- /dev/null +++ b/checkpoints/checkpoint-70/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f05905443502cc2771a1e39c2f819c6f57bbb38b36e5ecbcb7476426c46ea3 +size 1465 diff --git a/checkpoints/checkpoint-70/special_tokens_map.json b/checkpoints/checkpoint-70/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-70/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-70/tokenizer.json b/checkpoints/checkpoint-70/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-70/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-70/tokenizer_config.json b/checkpoints/checkpoint-70/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-70/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-70/trainer_state.json b/checkpoints/checkpoint-70/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..326a31e596649d7f05f8708bd9c898c2f386fb93 --- /dev/null +++ b/checkpoints/checkpoint-70/trainer_state.json @@ -0,0 +1,524 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.17654476670870115, + "eval_steps": 100, + "global_step": 70, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7823362830016512.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-70/training_args.bin b/checkpoints/checkpoint-70/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-70/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-75/README.md b/checkpoints/checkpoint-75/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-75/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-75/adapter_config.json b/checkpoints/checkpoint-75/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-75/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-75/adapter_model.safetensors b/checkpoints/checkpoint-75/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e14c6481a9a29ec4e04e891aa14cf9b9cf95635 --- /dev/null +++ b/checkpoints/checkpoint-75/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2177d8383a5d71f9a472cc2f0267a1e4b1f68e454f220412d1c92d035463eb +size 74016 diff --git a/checkpoints/checkpoint-75/chat_template.jinja b/checkpoints/checkpoint-75/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-75/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-75/optimizer.pt b/checkpoints/checkpoint-75/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..53e10b868c36db0d163c9ce0762b939c87113eab --- /dev/null +++ b/checkpoints/checkpoint-75/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0997c7ced53f10b0dcac13ec7f39a6cfc45bd573cd7d51191af134c10e76735d +size 43813 diff --git a/checkpoints/checkpoint-75/rng_state.pth b/checkpoints/checkpoint-75/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-75/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-75/scheduler.pt b/checkpoints/checkpoint-75/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85668aa2f8d38ca05b07c31cfd430ac38c41f661 --- /dev/null +++ b/checkpoints/checkpoint-75/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5014f1510acf8ba2b2f011be66042ec50f9672848379685db54fcbd9443eca03 +size 1465 diff --git a/checkpoints/checkpoint-75/special_tokens_map.json b/checkpoints/checkpoint-75/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-75/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-75/tokenizer.json b/checkpoints/checkpoint-75/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-75/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-75/tokenizer_config.json b/checkpoints/checkpoint-75/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-75/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-75/trainer_state.json b/checkpoints/checkpoint-75/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5e44b5898072f5302c7411a8afd4d99e15a8805a --- /dev/null +++ b/checkpoints/checkpoint-75/trainer_state.json @@ -0,0 +1,559 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.18915510718789408, + "eval_steps": 100, + "global_step": 75, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8387314274623488.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-75/training_args.bin b/checkpoints/checkpoint-75/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-75/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-80/README.md b/checkpoints/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-80/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-80/adapter_config.json b/checkpoints/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-80/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-80/adapter_model.safetensors b/checkpoints/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b330bd098804259ac91cbb98259766b5ca775208 --- /dev/null +++ b/checkpoints/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3484edbb13ea2d2d3d512d8bf7bf9765e641a55bc464b173cc2f35ce8adab46 +size 74016 diff --git a/checkpoints/checkpoint-80/chat_template.jinja b/checkpoints/checkpoint-80/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-80/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-80/optimizer.pt b/checkpoints/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a02e0b6c9ac870b8a97c3b0ccd836e339e5d4068 --- /dev/null +++ b/checkpoints/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb62331683e716d0a198d3ac751b3d296a3d0273a5c8a7d675fe384def599195 +size 43813 diff --git a/checkpoints/checkpoint-80/rng_state.pth b/checkpoints/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-80/scheduler.pt b/checkpoints/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..37aeeb88378d00d5883234290b278e0e9279f98d --- /dev/null +++ b/checkpoints/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f2095f9149a6919117af9a2ec29bf7d1ddaff4c98f8f7422a5360c32c5e70c +size 1465 diff --git a/checkpoints/checkpoint-80/special_tokens_map.json b/checkpoints/checkpoint-80/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-80/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-80/tokenizer.json b/checkpoints/checkpoint-80/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-80/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-80/tokenizer_config.json b/checkpoints/checkpoint-80/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-80/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-80/trainer_state.json b/checkpoints/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d7a2c68664636c20fb6389516191c6ff7f3e1b37 --- /dev/null +++ b/checkpoints/checkpoint-80/trainer_state.json @@ -0,0 +1,594 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.201765447667087, + "eval_steps": 100, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8947933524492288.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-80/training_args.bin b/checkpoints/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-85/README.md b/checkpoints/checkpoint-85/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-85/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-85/adapter_config.json b/checkpoints/checkpoint-85/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-85/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-85/adapter_model.safetensors b/checkpoints/checkpoint-85/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..326f54b870a0fc196281ab478b7a1d6e5739387b --- /dev/null +++ b/checkpoints/checkpoint-85/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63cb2baf05c5b1c61d85772db08704967ce88e875476783365039577628d699 +size 74016 diff --git a/checkpoints/checkpoint-85/chat_template.jinja b/checkpoints/checkpoint-85/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-85/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-85/optimizer.pt b/checkpoints/checkpoint-85/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..83ac85aa193a438412d8fd5e38bc81bcaa64366f --- /dev/null +++ b/checkpoints/checkpoint-85/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a464c4f4dac3f56aa0aaf9f42016cff9ec92f161c08d757e88125153c469a866 +size 43813 diff --git a/checkpoints/checkpoint-85/rng_state.pth b/checkpoints/checkpoint-85/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-85/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-85/scheduler.pt b/checkpoints/checkpoint-85/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0670877709ba7cae03a6812b7645b53f267a534 --- /dev/null +++ b/checkpoints/checkpoint-85/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2910bc5c8134167d284b9c77a9ddf19209c7d53330f3610a3db226db7acded7b +size 1465 diff --git a/checkpoints/checkpoint-85/special_tokens_map.json b/checkpoints/checkpoint-85/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-85/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-85/tokenizer.json b/checkpoints/checkpoint-85/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-85/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-85/tokenizer_config.json b/checkpoints/checkpoint-85/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-85/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-85/trainer_state.json b/checkpoints/checkpoint-85/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a23b7ecb8780fa2883d56e3c6543c0fe86195829 --- /dev/null +++ b/checkpoints/checkpoint-85/trainer_state.json @@ -0,0 +1,629 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.21437578814627994, + "eval_steps": 100, + "global_step": 85, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9505490757574656.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-85/training_args.bin b/checkpoints/checkpoint-85/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-85/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-90/README.md b/checkpoints/checkpoint-90/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-90/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-90/adapter_config.json b/checkpoints/checkpoint-90/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-90/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-90/adapter_model.safetensors b/checkpoints/checkpoint-90/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17047eeb0a4925c8a2de60cf38da07dafcc340ae --- /dev/null +++ b/checkpoints/checkpoint-90/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190eaca2fad47bb78d3e9ae1a97c16558f2dd85bd6710690bb7e5340650904f1 +size 74016 diff --git a/checkpoints/checkpoint-90/chat_template.jinja b/checkpoints/checkpoint-90/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-90/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-90/optimizer.pt b/checkpoints/checkpoint-90/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..71995a9642ffd0bd5555fc752b4ae12a7b64dc1b --- /dev/null +++ b/checkpoints/checkpoint-90/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88907c19d3ce624035f885291710a27debd3affd54403c6ee15ee2037b081ae8 +size 43813 diff --git a/checkpoints/checkpoint-90/rng_state.pth b/checkpoints/checkpoint-90/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-90/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-90/scheduler.pt b/checkpoints/checkpoint-90/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..377b4f0a8064e5e9532e65a3616c226b97f2044b --- /dev/null +++ b/checkpoints/checkpoint-90/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1d2d2aa2c56b7686bcd5878fbde1912712df0944052de1b730fa940e63bcb4 +size 1465 diff --git a/checkpoints/checkpoint-90/special_tokens_map.json b/checkpoints/checkpoint-90/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-90/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-90/tokenizer.json b/checkpoints/checkpoint-90/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-90/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-90/tokenizer_config.json b/checkpoints/checkpoint-90/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-90/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-90/trainer_state.json b/checkpoints/checkpoint-90/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0fdcd39beb6f0db4b9f0de132b54af97ba651c45 --- /dev/null +++ b/checkpoints/checkpoint-90/trainer_state.json @@ -0,0 +1,664 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.22698612862547288, + "eval_steps": 100, + "global_step": 90, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0067911193788416e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-90/training_args.bin b/checkpoints/checkpoint-90/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-90/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/checkpoints/checkpoint-95/README.md b/checkpoints/checkpoint-95/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f9b71d9a8724b3a615d85f0d20c55543b966d14 --- /dev/null +++ b/checkpoints/checkpoint-95/README.md @@ -0,0 +1,202 @@ +--- +base_model: unsloth/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoints/checkpoint-95/adapter_config.json b/checkpoints/checkpoint-95/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..568aa96e55b44e542f05cb2e2fd1fb708fd13cc3 --- /dev/null +++ b/checkpoints/checkpoint-95/adapter_config.json @@ -0,0 +1,35 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "unsloth/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": [ + 2 + ], + "loftq_config": {}, + "lora_alpha": 512, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 1, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": true +} \ No newline at end of file diff --git a/checkpoints/checkpoint-95/adapter_model.safetensors b/checkpoints/checkpoint-95/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4e5d0f44029a03c0e3ab0955c81c2ab1e295677 --- /dev/null +++ b/checkpoints/checkpoint-95/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d053e9a8fdf560a3962de6d6020066b035d962fa678c6a0a14479e80bb403f94 +size 74016 diff --git a/checkpoints/checkpoint-95/chat_template.jinja b/checkpoints/checkpoint-95/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoints/checkpoint-95/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoints/checkpoint-95/optimizer.pt b/checkpoints/checkpoint-95/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..63685cf5285f561089ccdca4efb40f469275d287 --- /dev/null +++ b/checkpoints/checkpoint-95/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3384d35485905d7e3067aa1d688498775162cee5cf688e123a14f97c0ddd397 +size 43813 diff --git a/checkpoints/checkpoint-95/rng_state.pth b/checkpoints/checkpoint-95/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..449028c1ecc80657ae434a40b29f1f2bdccb9195 --- /dev/null +++ b/checkpoints/checkpoint-95/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e15e837284f30841feeb4cb11a4ca47e6e0a0d43907e64044c865959176390 +size 14581 diff --git a/checkpoints/checkpoint-95/scheduler.pt b/checkpoints/checkpoint-95/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7f819d7065cfe5ed0f938c52b823da0e181fad --- /dev/null +++ b/checkpoints/checkpoint-95/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07de25ec9be65a0b483f6d4e7e6dac1d25b465b1f8cad89c3a641d70e20a7f2 +size 1465 diff --git a/checkpoints/checkpoint-95/special_tokens_map.json b/checkpoints/checkpoint-95/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/checkpoints/checkpoint-95/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoints/checkpoint-95/tokenizer.json b/checkpoints/checkpoint-95/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoints/checkpoint-95/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoints/checkpoint-95/tokenizer_config.json b/checkpoints/checkpoint-95/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/checkpoints/checkpoint-95/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/checkpoints/checkpoint-95/trainer_state.json b/checkpoints/checkpoint-95/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f339af1414230ea496acd625bd8d10bf1bcb38d7 --- /dev/null +++ b/checkpoints/checkpoint-95/trainer_state.json @@ -0,0 +1,699 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.23959646910466584, + "eval_steps": 100, + "global_step": 95, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0025220680958385876, + "grad_norm": 9.573529243469238, + "learning_rate": 0.0, + "loss": 2.4462, + "step": 1 + }, + { + "epoch": 0.005044136191677175, + "grad_norm": 9.36291217803955, + "learning_rate": 4.000000000000001e-06, + "loss": 2.6811, + "step": 2 + }, + { + "epoch": 0.007566204287515763, + "grad_norm": 8.949094772338867, + "learning_rate": 8.000000000000001e-06, + "loss": 2.5078, + "step": 3 + }, + { + "epoch": 0.01008827238335435, + "grad_norm": 8.079751014709473, + "learning_rate": 1.2e-05, + "loss": 2.6826, + "step": 4 + }, + { + "epoch": 0.012610340479192938, + "grad_norm": 9.398191452026367, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.6193, + "step": 5 + }, + { + "epoch": 0.015132408575031526, + "grad_norm": 11.087775230407715, + "learning_rate": 2e-05, + "loss": 2.5306, + "step": 6 + }, + { + "epoch": 0.017654476670870115, + "grad_norm": 13.118534088134766, + "learning_rate": 1.9948979591836737e-05, + "loss": 2.4631, + "step": 7 + }, + { + "epoch": 0.0201765447667087, + "grad_norm": 15.161271095275879, + "learning_rate": 1.9897959183673473e-05, + "loss": 2.5809, + "step": 8 + }, + { + "epoch": 0.02269861286254729, + "grad_norm": 13.814377784729004, + "learning_rate": 1.9846938775510205e-05, + "loss": 2.4512, + "step": 9 + }, + { + "epoch": 0.025220680958385876, + "grad_norm": 18.89747428894043, + "learning_rate": 1.979591836734694e-05, + "loss": 2.3011, + "step": 10 + }, + { + "epoch": 0.027742749054224466, + "grad_norm": 15.47609806060791, + "learning_rate": 1.9744897959183677e-05, + "loss": 2.3821, + "step": 11 + }, + { + "epoch": 0.03026481715006305, + "grad_norm": 15.099431991577148, + "learning_rate": 1.969387755102041e-05, + "loss": 2.3741, + "step": 12 + }, + { + "epoch": 0.03278688524590164, + "grad_norm": 18.993785858154297, + "learning_rate": 1.9642857142857145e-05, + "loss": 2.4979, + "step": 13 + }, + { + "epoch": 0.03530895334174023, + "grad_norm": 16.811134338378906, + "learning_rate": 1.9591836734693877e-05, + "loss": 2.369, + "step": 14 + }, + { + "epoch": 0.03783102143757881, + "grad_norm": 20.262407302856445, + "learning_rate": 1.9540816326530613e-05, + "loss": 2.3913, + "step": 15 + }, + { + "epoch": 0.0403530895334174, + "grad_norm": 19.87333106994629, + "learning_rate": 1.948979591836735e-05, + "loss": 2.5415, + "step": 16 + }, + { + "epoch": 0.04287515762925599, + "grad_norm": 18.393142700195312, + "learning_rate": 1.9438775510204085e-05, + "loss": 2.2834, + "step": 17 + }, + { + "epoch": 0.04539722572509458, + "grad_norm": 19.849857330322266, + "learning_rate": 1.9387755102040817e-05, + "loss": 2.319, + "step": 18 + }, + { + "epoch": 0.04791929382093316, + "grad_norm": 17.821640014648438, + "learning_rate": 1.9336734693877553e-05, + "loss": 2.2776, + "step": 19 + }, + { + "epoch": 0.05044136191677175, + "grad_norm": 16.894283294677734, + "learning_rate": 1.928571428571429e-05, + "loss": 2.2729, + "step": 20 + }, + { + "epoch": 0.05296343001261034, + "grad_norm": 16.883058547973633, + "learning_rate": 1.9234693877551024e-05, + "loss": 2.2581, + "step": 21 + }, + { + "epoch": 0.05548549810844893, + "grad_norm": 16.993877410888672, + "learning_rate": 1.9183673469387756e-05, + "loss": 2.2428, + "step": 22 + }, + { + "epoch": 0.058007566204287514, + "grad_norm": 18.408275604248047, + "learning_rate": 1.9132653061224492e-05, + "loss": 2.374, + "step": 23 + }, + { + "epoch": 0.0605296343001261, + "grad_norm": 15.80359935760498, + "learning_rate": 1.9081632653061225e-05, + "loss": 2.2973, + "step": 24 + }, + { + "epoch": 0.06305170239596469, + "grad_norm": 17.94160270690918, + "learning_rate": 1.903061224489796e-05, + "loss": 2.2567, + "step": 25 + }, + { + "epoch": 0.06557377049180328, + "grad_norm": 17.734100341796875, + "learning_rate": 1.8979591836734696e-05, + "loss": 2.1519, + "step": 26 + }, + { + "epoch": 0.06809583858764187, + "grad_norm": 17.123193740844727, + "learning_rate": 1.892857142857143e-05, + "loss": 2.2786, + "step": 27 + }, + { + "epoch": 0.07061790668348046, + "grad_norm": 18.64126968383789, + "learning_rate": 1.8877551020408164e-05, + "loss": 2.3496, + "step": 28 + }, + { + "epoch": 0.07313997477931904, + "grad_norm": 18.795074462890625, + "learning_rate": 1.88265306122449e-05, + "loss": 2.2782, + "step": 29 + }, + { + "epoch": 0.07566204287515763, + "grad_norm": 20.494304656982422, + "learning_rate": 1.8775510204081636e-05, + "loss": 2.2636, + "step": 30 + }, + { + "epoch": 0.07818411097099622, + "grad_norm": 19.422025680541992, + "learning_rate": 1.8724489795918368e-05, + "loss": 2.2449, + "step": 31 + }, + { + "epoch": 0.0807061790668348, + "grad_norm": 19.913166046142578, + "learning_rate": 1.8673469387755104e-05, + "loss": 2.2863, + "step": 32 + }, + { + "epoch": 0.0832282471626734, + "grad_norm": 20.73911476135254, + "learning_rate": 1.862244897959184e-05, + "loss": 2.2711, + "step": 33 + }, + { + "epoch": 0.08575031525851198, + "grad_norm": 22.91425895690918, + "learning_rate": 1.8571428571428575e-05, + "loss": 2.0419, + "step": 34 + }, + { + "epoch": 0.08827238335435057, + "grad_norm": 24.31956672668457, + "learning_rate": 1.8520408163265307e-05, + "loss": 2.1162, + "step": 35 + }, + { + "epoch": 0.09079445145018916, + "grad_norm": 18.169391632080078, + "learning_rate": 1.8469387755102043e-05, + "loss": 1.9727, + "step": 36 + }, + { + "epoch": 0.09331651954602774, + "grad_norm": 25.95748519897461, + "learning_rate": 1.8418367346938776e-05, + "loss": 2.2558, + "step": 37 + }, + { + "epoch": 0.09583858764186633, + "grad_norm": 18.037721633911133, + "learning_rate": 1.836734693877551e-05, + "loss": 2.0268, + "step": 38 + }, + { + "epoch": 0.09836065573770492, + "grad_norm": 22.27294921875, + "learning_rate": 1.8316326530612247e-05, + "loss": 2.0745, + "step": 39 + }, + { + "epoch": 0.1008827238335435, + "grad_norm": 19.726648330688477, + "learning_rate": 1.826530612244898e-05, + "loss": 2.0145, + "step": 40 + }, + { + "epoch": 0.1034047919293821, + "grad_norm": 22.59362030029297, + "learning_rate": 1.8214285714285715e-05, + "loss": 1.925, + "step": 41 + }, + { + "epoch": 0.10592686002522068, + "grad_norm": 21.95757484436035, + "learning_rate": 1.816326530612245e-05, + "loss": 2.0348, + "step": 42 + }, + { + "epoch": 0.10844892812105927, + "grad_norm": 20.475801467895508, + "learning_rate": 1.8112244897959187e-05, + "loss": 1.9361, + "step": 43 + }, + { + "epoch": 0.11097099621689786, + "grad_norm": 23.88913345336914, + "learning_rate": 1.806122448979592e-05, + "loss": 2.0103, + "step": 44 + }, + { + "epoch": 0.11349306431273644, + "grad_norm": 30.553556442260742, + "learning_rate": 1.8010204081632655e-05, + "loss": 2.0374, + "step": 45 + }, + { + "epoch": 0.11601513240857503, + "grad_norm": 18.443796157836914, + "learning_rate": 1.795918367346939e-05, + "loss": 2.0119, + "step": 46 + }, + { + "epoch": 0.11853720050441362, + "grad_norm": 16.667591094970703, + "learning_rate": 1.7908163265306123e-05, + "loss": 1.9304, + "step": 47 + }, + { + "epoch": 0.1210592686002522, + "grad_norm": 21.38385772705078, + "learning_rate": 1.785714285714286e-05, + "loss": 2.0972, + "step": 48 + }, + { + "epoch": 0.1235813366960908, + "grad_norm": 34.97208023071289, + "learning_rate": 1.780612244897959e-05, + "loss": 1.9028, + "step": 49 + }, + { + "epoch": 0.12610340479192939, + "grad_norm": 27.10704231262207, + "learning_rate": 1.7755102040816327e-05, + "loss": 1.9675, + "step": 50 + }, + { + "epoch": 0.12862547288776796, + "grad_norm": 25.97450828552246, + "learning_rate": 1.7704081632653062e-05, + "loss": 1.9325, + "step": 51 + }, + { + "epoch": 0.13114754098360656, + "grad_norm": 23.316164016723633, + "learning_rate": 1.7653061224489798e-05, + "loss": 1.9334, + "step": 52 + }, + { + "epoch": 0.13366960907944514, + "grad_norm": 20.12580108642578, + "learning_rate": 1.760204081632653e-05, + "loss": 1.9, + "step": 53 + }, + { + "epoch": 0.13619167717528374, + "grad_norm": 19.166135787963867, + "learning_rate": 1.7551020408163266e-05, + "loss": 2.0969, + "step": 54 + }, + { + "epoch": 0.13871374527112232, + "grad_norm": 24.145370483398438, + "learning_rate": 1.7500000000000002e-05, + "loss": 1.9222, + "step": 55 + }, + { + "epoch": 0.14123581336696092, + "grad_norm": 18.181575775146484, + "learning_rate": 1.7448979591836738e-05, + "loss": 1.9108, + "step": 56 + }, + { + "epoch": 0.1437578814627995, + "grad_norm": 19.294092178344727, + "learning_rate": 1.7397959183673473e-05, + "loss": 2.0482, + "step": 57 + }, + { + "epoch": 0.14627994955863807, + "grad_norm": 31.68648910522461, + "learning_rate": 1.7346938775510206e-05, + "loss": 1.8425, + "step": 58 + }, + { + "epoch": 0.14880201765447668, + "grad_norm": 31.456830978393555, + "learning_rate": 1.729591836734694e-05, + "loss": 1.8343, + "step": 59 + }, + { + "epoch": 0.15132408575031525, + "grad_norm": 26.11122703552246, + "learning_rate": 1.7244897959183674e-05, + "loss": 2.0046, + "step": 60 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 26.997562408447266, + "learning_rate": 1.719387755102041e-05, + "loss": 1.886, + "step": 61 + }, + { + "epoch": 0.15636822194199243, + "grad_norm": 27.2921199798584, + "learning_rate": 1.7142857142857142e-05, + "loss": 1.7468, + "step": 62 + }, + { + "epoch": 0.15889029003783103, + "grad_norm": 24.611576080322266, + "learning_rate": 1.7091836734693878e-05, + "loss": 1.9542, + "step": 63 + }, + { + "epoch": 0.1614123581336696, + "grad_norm": 29.580781936645508, + "learning_rate": 1.7040816326530613e-05, + "loss": 1.7859, + "step": 64 + }, + { + "epoch": 0.16393442622950818, + "grad_norm": 26.715187072753906, + "learning_rate": 1.698979591836735e-05, + "loss": 1.7903, + "step": 65 + }, + { + "epoch": 0.1664564943253468, + "grad_norm": 23.047237396240234, + "learning_rate": 1.6938775510204085e-05, + "loss": 1.9575, + "step": 66 + }, + { + "epoch": 0.16897856242118536, + "grad_norm": 22.88684844970703, + "learning_rate": 1.6887755102040817e-05, + "loss": 1.8025, + "step": 67 + }, + { + "epoch": 0.17150063051702397, + "grad_norm": 48.304630279541016, + "learning_rate": 1.6836734693877553e-05, + "loss": 1.803, + "step": 68 + }, + { + "epoch": 0.17402269861286254, + "grad_norm": 24.825862884521484, + "learning_rate": 1.678571428571429e-05, + "loss": 1.8354, + "step": 69 + }, + { + "epoch": 0.17654476670870115, + "grad_norm": 20.993305206298828, + "learning_rate": 1.673469387755102e-05, + "loss": 1.6725, + "step": 70 + }, + { + "epoch": 0.17906683480453972, + "grad_norm": 35.779930114746094, + "learning_rate": 1.6683673469387757e-05, + "loss": 1.8141, + "step": 71 + }, + { + "epoch": 0.18158890290037832, + "grad_norm": 58.69309997558594, + "learning_rate": 1.6632653061224492e-05, + "loss": 1.8722, + "step": 72 + }, + { + "epoch": 0.1841109709962169, + "grad_norm": 45.597023010253906, + "learning_rate": 1.6581632653061225e-05, + "loss": 1.8483, + "step": 73 + }, + { + "epoch": 0.18663303909205547, + "grad_norm": 43.947933197021484, + "learning_rate": 1.653061224489796e-05, + "loss": 1.8725, + "step": 74 + }, + { + "epoch": 0.18915510718789408, + "grad_norm": 47.61981201171875, + "learning_rate": 1.6479591836734696e-05, + "loss": 1.9448, + "step": 75 + }, + { + "epoch": 0.19167717528373265, + "grad_norm": 49.325035095214844, + "learning_rate": 1.642857142857143e-05, + "loss": 1.7181, + "step": 76 + }, + { + "epoch": 0.19419924337957126, + "grad_norm": 64.5460433959961, + "learning_rate": 1.6377551020408164e-05, + "loss": 1.6482, + "step": 77 + }, + { + "epoch": 0.19672131147540983, + "grad_norm": 37.73046875, + "learning_rate": 1.63265306122449e-05, + "loss": 1.7696, + "step": 78 + }, + { + "epoch": 0.19924337957124844, + "grad_norm": 44.14755630493164, + "learning_rate": 1.6275510204081636e-05, + "loss": 1.5095, + "step": 79 + }, + { + "epoch": 0.201765447667087, + "grad_norm": 45.268978118896484, + "learning_rate": 1.6224489795918368e-05, + "loss": 1.8428, + "step": 80 + }, + { + "epoch": 0.2042875157629256, + "grad_norm": 46.6614875793457, + "learning_rate": 1.6173469387755104e-05, + "loss": 1.8, + "step": 81 + }, + { + "epoch": 0.2068095838587642, + "grad_norm": 35.18565368652344, + "learning_rate": 1.612244897959184e-05, + "loss": 1.8676, + "step": 82 + }, + { + "epoch": 0.20933165195460277, + "grad_norm": 76.26611328125, + "learning_rate": 1.6071428571428572e-05, + "loss": 1.7683, + "step": 83 + }, + { + "epoch": 0.21185372005044137, + "grad_norm": 56.9483757019043, + "learning_rate": 1.6020408163265308e-05, + "loss": 1.8371, + "step": 84 + }, + { + "epoch": 0.21437578814627994, + "grad_norm": 35.35915756225586, + "learning_rate": 1.596938775510204e-05, + "loss": 1.6537, + "step": 85 + }, + { + "epoch": 0.21689785624211855, + "grad_norm": 46.109214782714844, + "learning_rate": 1.5918367346938776e-05, + "loss": 1.7508, + "step": 86 + }, + { + "epoch": 0.21941992433795712, + "grad_norm": 47.85239791870117, + "learning_rate": 1.586734693877551e-05, + "loss": 1.8294, + "step": 87 + }, + { + "epoch": 0.22194199243379573, + "grad_norm": 70.48799133300781, + "learning_rate": 1.5816326530612247e-05, + "loss": 1.8041, + "step": 88 + }, + { + "epoch": 0.2244640605296343, + "grad_norm": 57.212501525878906, + "learning_rate": 1.576530612244898e-05, + "loss": 1.7635, + "step": 89 + }, + { + "epoch": 0.22698612862547288, + "grad_norm": 56.162498474121094, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.6948, + "step": 90 + }, + { + "epoch": 0.22950819672131148, + "grad_norm": 44.635799407958984, + "learning_rate": 1.566326530612245e-05, + "loss": 1.763, + "step": 91 + }, + { + "epoch": 0.23203026481715006, + "grad_norm": 71.88455963134766, + "learning_rate": 1.5612244897959187e-05, + "loss": 1.8955, + "step": 92 + }, + { + "epoch": 0.23455233291298866, + "grad_norm": 52.144920349121094, + "learning_rate": 1.556122448979592e-05, + "loss": 1.79, + "step": 93 + }, + { + "epoch": 0.23707440100882723, + "grad_norm": 46.399227142333984, + "learning_rate": 1.5510204081632655e-05, + "loss": 1.6578, + "step": 94 + }, + { + "epoch": 0.23959646910466584, + "grad_norm": 46.650047302246094, + "learning_rate": 1.545918367346939e-05, + "loss": 1.7317, + "step": 95 + } + ], + "logging_steps": 1, + "max_steps": 397, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0621866054180864e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/checkpoint-95/training_args.bin b/checkpoints/checkpoint-95/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/checkpoints/checkpoint-95/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3c1d04911c269b925af977a3151c9704e990e4d0 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8eeccd5b2fd61f2dd1f5191d5ff32bf56c0b79c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2066 @@ +{ + "add_bos_token": true, + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "right", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b504a7c08200fe330e60b6264694f2292ed3af66 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1148cb2048fcd487752a02491f5813c1726c78dde853c4e77411330e31f86fa1 +size 6033